mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2025-10-09 13:40:09 +03:00
* added games * added llama 3b training conf * update readme with details of external evals * readme update --------- Co-authored-by: joesharratt1229 <joesharratt1229@gmail.com>
89 lines
4.0 KiB
YAML
89 lines
4.0 KiB
YAML
dataset_name: main
|
|
dataset_path: gsm8k
|
|
doc_to_target: '{{answer.split(''####'')[-1].strip() if answer is defined else target}}'
|
|
doc_to_text: "<|im_start|>system\nYou are a helpful AI Assistant that provides well-reasoned and detailed responses.\nYou first think about the reasoning process as an internal monologue and then provide the user with the answer.\nRespond in the following format:\n<think>\n...\n</think>\n<answer>\n...\n</answer><|im_end|>\n<|im_start|>user\nGiven the following problem, reason and give a final answer to the problem.\nProblem: {{question}}\nYour response should end with \"The final answer is [answer]\" where [answer] is the response to the problem.<|im_end|>\n<|im_start|>assistant\n"
|
|
fewshot_config:
|
|
sampler: first_n
|
|
samples:
|
|
- question: There are 15 trees in the grove. Grove workers will plant trees in the
|
|
grove today. After they are done, there will be 21 trees. How many trees did
|
|
the grove workers plant today?
|
|
target: There are 15 trees originally. Then there were 21 trees after some more
|
|
were planted. So there must have been 21 - 15 = 6. The final answer is 6
|
|
- question: If there are 3 cars in the parking lot and 2 more cars arrive, how many
|
|
cars are in the parking lot?
|
|
target: There are originally 3 cars. 2 more cars arrive. 3 + 2 = 5. The final answer
|
|
is 5
|
|
- question: Leah had 32 chocolates and her sister had 42. If they ate 35, how many
|
|
pieces do they have left in total?
|
|
target: Originally, Leah had 32 chocolates. Her sister had 42. So in total they
|
|
had 32 + 42 = 74. After eating 35, they had 74 - 35 = 39. The final answer is 39
|
|
- question: Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12
|
|
lollipops. How many lollipops did Jason give to Denny?
|
|
target: Jason started with 20 lollipops. Then he had 12 after giving some to Denny.
|
|
So he gave Denny 20 - 12 = 8. The final answer is 8
|
|
- question: Shawn has five toys. For Christmas, he got two toys each from his mom and
|
|
dad. How many toys does he have now?
|
|
target: Shawn started with 5 toys. If he got 2 toys each from his mom and dad,
|
|
then that is 4 more toys. 5 + 4 = 9. The final answer is 9
|
|
- question: There were nine computers in the server room. Five more computers were
|
|
installed each day, from monday to thursday. How many computers are now in the
|
|
server room?
|
|
target: There were originally 9 computers. For each of 4 days, 5 more computers
|
|
were added. So 5 * 4 = 20 computers were added. 9 + 20 is 29. The final answer is
|
|
29
|
|
- question: Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday,
|
|
he lost 2 more. How many golf balls did he have at the end of wednesday?
|
|
target: Michael started with 58 golf balls. After losing 23 on tuesday, he had
|
|
58 - 23 = 35. After losing 2 more, he had 35 - 2 = 33 golf balls. The final answer
|
|
is 33
|
|
- question: Olivia has $23. She bought five bagels for $3 each. How much money does
|
|
she have left?
|
|
target: Olivia had 23 dollars. 5 bagels for 3 dollars each will be 5 x 3 = 15
|
|
dollars. So she has 23 - 15 dollars left. 23 - 15 is 8. The final answer is 8
|
|
filter_list:
|
|
- filter:
|
|
- function: regex
|
|
group_select: -1
|
|
regex_pattern: The final answer is ((-?[$0-9.,]{2,})|(-?[0-9]+))
|
|
- function: take_first
|
|
name: strict-match
|
|
- filter:
|
|
- function: regex
|
|
group_select: -1
|
|
regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
|
|
- function: take_first
|
|
name: flexible-extract
|
|
generation_kwargs:
|
|
do_sample: true
|
|
temperature: 0.6
|
|
top_p: 0.95
|
|
max_gen_toks: 4096
|
|
until:
|
|
- '<|eot_id|>'
|
|
- '<|start_header_id|>user<|end_header_id|>'
|
|
- 'Q:'
|
|
- </s>
|
|
- <|im_end|>
|
|
- </answer>
|
|
tag:
|
|
- chain_of_thought
|
|
metadata:
|
|
version: 3.0
|
|
metric_list:
|
|
- aggregation: mean
|
|
higher_is_better: true
|
|
ignore_case: true
|
|
ignore_punctuation: false
|
|
metric: exact_match
|
|
regexes_to_ignore:
|
|
- ','
|
|
- \$
|
|
- '(?s).*#### '
|
|
- \.$
|
|
num_fewshot: 8
|
|
output_type: generate_until
|
|
repeats: 1
|
|
task: gsm8k_cot_rg
|
|
test_split: test
|