mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2025-10-09 13:40:09 +03:00
final tweaks
This commit is contained in:
@@ -552,9 +552,49 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "371d38e1fe9e41d587b2cfa64ca9ef91",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0/7053 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Response 404\n",
|
||||
"Response 404\n",
|
||||
"Response 404\n",
|
||||
"Response 404\n",
|
||||
"Response 404\n",
|
||||
"Response 404\n",
|
||||
"Response 404\n",
|
||||
"Response 404\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"full_sampling_fails: 913\n",
|
||||
"warmup_fails: 528\n",
|
||||
"missing_input_generator: 36\n",
|
||||
"cannot_initialize_code: 98\n",
|
||||
"Total errors: 1575\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"CODE_TEMPLATE = \"\"\"from random import Random\n",
|
||||
"{code_sample}\n",
|
||||
@@ -584,7 +624,7 @@
|
||||
" iterator = tqdm(enumerate(f_in), total=total_entries)\n",
|
||||
"\n",
|
||||
" for i, line in iterator:\n",
|
||||
" iterator.set_description(f\"Failures:\" + \" | \".join(f\"{k}: {v}\" for k, v in errors.items()) + f\" | total: {sum(errors.values())}\")\n",
|
||||
" iterator.set_description(f\"Failures: \" + \" | \".join(f\"{k}: {v}\" for k, v in errors.items()) + f\" | total: {sum(errors.values())}\")\n",
|
||||
" entry = json.loads(line)\n",
|
||||
"\n",
|
||||
" if not \"input_generator\" in entry:\n",
|
||||
@@ -652,72 +692,6 @@
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"errors = defaultdict(int)\n",
|
||||
"total_entries = sum(1 for _ in open(\"data/codeio-pyedu-with-input-generator.jsonl\", \"r\"))\n",
|
||||
"\n",
|
||||
"# with open(\"data/codeio-pyedu-with-input-generator.jsonl\", \"r\") as f_in, \\\n",
|
||||
"# open(\"data/codeio-pyedu-with-input-generator-filtered.jsonl\", \"w+\") as f_out:\n",
|
||||
"\n",
|
||||
"# iterator = tqdm(enumerate(f_in), total=total_entries)\n",
|
||||
"\n",
|
||||
"# for i, line in iterator:\n",
|
||||
"# iterator.set_description(f\"Processing {i}/{total_entries} | \" + \" | \".join(f\"{k}: {v}\" for k, v in errors.items()) + f\" | total: {sum(errors.values())}\")\n",
|
||||
"# entry = json.loads(line)\n",
|
||||
"# # Check if input generator is present\n",
|
||||
"# if not \"input_generator\" in entry:\n",
|
||||
"# errors[\"missing_input_generator\"] += 1\n",
|
||||
"# continue\n",
|
||||
" \n",
|
||||
"# # Check if input generator is valid function\n",
|
||||
"# try:\n",
|
||||
"# input_generator_func = get_input_generator_func(entry['code_sample'], entry['input_generator'])\n",
|
||||
"# except Exception as e:\n",
|
||||
"# errors[\"cannot_instantiate_input_generator\"] += 1\n",
|
||||
"# continue\n",
|
||||
"\n",
|
||||
"# skip = False\n",
|
||||
"# seen_inputs, seen_outputs = set(), set()\n",
|
||||
"\n",
|
||||
"# for _ in range(NUM_INPUT_GENERATE):\n",
|
||||
"# try:\n",
|
||||
"# # Check if you can generate input\n",
|
||||
"# signal.alarm(ALARM_TOLERANCE)\n",
|
||||
"# random_input = input_generator_func(rng)\n",
|
||||
"# signal.alarm(0)\n",
|
||||
"# seen_inputs.add(hash(json.dumps(random_input)))\n",
|
||||
"\n",
|
||||
"# # Check if code snippet can execute with generated input\n",
|
||||
"# signal.alarm(ALARM_TOLERANCE)\n",
|
||||
"# random_output = execute_code_sample(entry[\"code_sample\"], random_input)\n",
|
||||
"# signal.alarm(0)\n",
|
||||
"# seen_outputs.add(hash(json.dumps(random_output)))\n",
|
||||
"# except Exception as e:\n",
|
||||
"# signal.alarm(0)\n",
|
||||
"# errors[\"unreliable_input_generator\"] += 1\n",
|
||||
"# skip = True\n",
|
||||
"# break\n",
|
||||
"# if skip: \n",
|
||||
"# continue\n",
|
||||
" \n",
|
||||
"# if len(seen_inputs) / NUM_INPUT_GENERATE < PERCENT_UNIQUE_INPUTS:\n",
|
||||
"# errors[\"insufficient_unique_inputs\"] += 1\n",
|
||||
"# continue\n",
|
||||
" \n",
|
||||
"# if len(seen_outputs) / NUM_INPUT_GENERATE < PERCENT_UNIQUE_OUTPUTS:\n",
|
||||
"# errors[\"insufficient_unique_outputs\"] += 1\n",
|
||||
"# continue\n",
|
||||
"\n",
|
||||
"# f_out.write(json.dumps(entry))\n",
|
||||
"# f_out.write(\"\\n\")\n",
|
||||
"\n",
|
||||
"# for k, v in errors.items():\n",
|
||||
"# print(f\"{k}: {v}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
|
||||
Reference in New Issue
Block a user