Files
claude-cookbooks/tool_evaluation/evaluation.xml
2025-09-10 23:43:39 +00:00

116 lines
3.4 KiB
XML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<evaluation>
<!-- Date/Time Calculations -->
<task>
<prompt>How many days are between March 15, 2024 and September 22, 2025? Include both start and end dates in your count.</prompt>
<response>557</response>
</task>
<task>
<prompt>If a meeting starts at 11:45 AM and lasts for 2 hours and 37 minutes, what time does it end? Express in 24-hour format as HH:MM.</prompt>
<response>14:22</response>
</task>
<!-- Modular Arithmetic -->
<task>
<prompt>What is 2^100 mod 7? Give the exact integer result.</prompt>
<response>2</response>
</task>
<task>
<prompt>What day of the week will it be 1000 days from Monday?</prompt>
<response>Wednesday</response>
</task>
<!-- Factorial/Combinatorics -->
<task>
<prompt>Calculate 15! (15 factorial). Give the exact integer result.</prompt>
<response>1307674368000</response>
</task>
<task>
<prompt>How many different ways can you choose 5 items from a set of 12 items? (Calculate C(12,5))</prompt>
<response>792</response>
</task>
<!-- Trigonometric Functions -->
<task>
<prompt>Calculate sin(π/6) + cos(π/3) + tan(π/4). Give the exact value.</prompt>
<response>2</response>
</task>
<!-- Logarithms/Exponentials -->
<task>
<prompt>Solve for x: 2^x = 128. Give the exact integer value.</prompt>
<response>7</response>
</task>
<task>
<prompt>Calculate ln(e^3) + log₁₀(1000) - log₂(8). Give the exact value.</prompt>
<response>3</response>
</task>
<!-- Matrix Operations -->
<task>
<prompt>Calculate the determinant of the 2x2 matrix [[3, 7], [2, 5]].</prompt>
<response>1</response>
</task>
<!-- Number Theory -->
<task>
<prompt>What is the greatest common divisor (GCD) of 1071 and 462?</prompt>
<response>21</response>
</task>
<task>
<prompt>Is 97 a prime number? Answer 'true' or 'false'.</prompt>
<response>true</response>
</task>
<!-- Bitwise Operations -->
<task>
<prompt>Calculate 42 XOR 15 (bitwise exclusive OR).</prompt>
<response>37</response>
</task>
<!-- Floor/Ceiling Functions -->
<task>
<prompt>Calculate floor(7.8) × ceiling(2.1) + round(4.5).</prompt>
<response>25</response>
</task>
<!-- Complex Numbers -->
<task>
<prompt>Calculate the magnitude of the complex number 3 + 4i.</prompt>
<response>5</response>
</task>
<!-- Base Conversions -->
<task>
<prompt>Convert the hexadecimal number FF to decimal.</prompt>
<response>255</response>
</task>
<!-- Statistical Functions -->
<task>
<prompt>Calculate the median of this dataset: [3, 7, 2, 9, 1, 5, 8].</prompt>
<response>5</response>
</task>
<!-- Recursive Calculations -->
<task>
<prompt>Calculate the 10th Fibonacci number (where F(1)=1, F(2)=1).</prompt>
<response>55</response>
</task>
<!-- Percentage of Percentage -->
<task>
<prompt>What is 25% of 40% of 80% of 500?</prompt>
<response>40</response>
</task>
<!-- Unit Conversions -->
<task>
<prompt>Convert 72 degrees Fahrenheit to Celsius. Round to 1 decimal place.</prompt>
<response>22.2</response>
</task>
</evaluation>