mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2025-10-09 13:40:09 +03:00
update gsmcross-check status
This commit is contained in:
@@ -246,7 +246,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -263,12 +263,12 @@
|
||||
"[1.2], llm=42, ground_truth=42, match=True\n",
|
||||
"[1.3], llm=76, ground_truth=76, match=True\n",
|
||||
"[1.4], llm=80, ground_truth=80, match=True\n",
|
||||
"[2.0], llm=47, ground_truth=47, match=True\n",
|
||||
"[2.1], llm=62, ground_truth=61, match=False\n",
|
||||
"[2.2], llm=36, ground_truth=36, match=True\n",
|
||||
"[2.3], llm=41, ground_truth=40, match=False\n",
|
||||
"[2.4], llm=70, ground_truth=70, match=True\n",
|
||||
"[3.0], llm=55, ground_truth=55, match=True\n",
|
||||
"[2.0], llm=84, ground_truth=84, match=True\n",
|
||||
"[2.1], llm=91, ground_truth=91, match=True\n",
|
||||
"[2.2], llm=79, ground_truth=79, match=True\n",
|
||||
"[2.3], llm=60, ground_truth=60, match=True\n",
|
||||
"[2.4], llm=72, ground_truth=72, match=True\n",
|
||||
"[3.0], llm=110, ground_truth=110, match=True\n",
|
||||
"[3.1], llm=8, ground_truth=8, match=True\n",
|
||||
"[3.2], llm=33, ground_truth=33, match=True\n",
|
||||
"[3.3], llm=7, ground_truth=7, match=True\n",
|
||||
@@ -308,11 +308,11 @@
|
||||
"[10.2], llm=24, ground_truth=24, match=True\n",
|
||||
"[10.3], llm=67, ground_truth=67, match=True\n",
|
||||
"[10.4], llm=90, ground_truth=90, match=True\n",
|
||||
"[11.0], llm=401, ground_truth=330, match=False\n",
|
||||
"[11.1], llm=481, ground_truth=386, match=False\n",
|
||||
"[11.2], llm=219, ground_truth=390, match=False\n",
|
||||
"[11.3], llm=212, ground_truth=386, match=False\n",
|
||||
"[11.4], llm=268, ground_truth=231, match=False\n",
|
||||
"[11.0], llm=330, ground_truth=330, match=True\n",
|
||||
"[11.1], llm=386, ground_truth=386, match=True\n",
|
||||
"[11.2], llm=390, ground_truth=390, match=True\n",
|
||||
"[11.3], llm=386, ground_truth=386, match=True\n",
|
||||
"[11.4], llm=231, ground_truth=231, match=True\n",
|
||||
"[12.0], llm=351, ground_truth=351, match=True\n",
|
||||
"[12.1], llm=269, ground_truth=269, match=True\n",
|
||||
"[12.2], llm=286, ground_truth=286, match=True\n",
|
||||
@@ -358,347 +358,347 @@
|
||||
"[20.2], llm=1, ground_truth=1, match=True\n",
|
||||
"[20.3], llm=3, ground_truth=3, match=True\n",
|
||||
"[20.4], llm=3, ground_truth=3, match=True\n",
|
||||
"[21.0], llm=888, ground_truth=888, match=True\n",
|
||||
"[21.1], llm=306, ground_truth=498, match=False\n",
|
||||
"[21.2], llm=738, ground_truth=738, match=True\n",
|
||||
"[21.3], llm=360, ground_truth=648, match=False\n",
|
||||
"[21.4], llm=0, ground_truth=-80, match=False\n",
|
||||
"[22.0], llm=8800, ground_truth=8800, match=True\n",
|
||||
"[22.1], llm=46000, ground_truth=46000, match=True\n",
|
||||
"[22.2], llm=51200, ground_truth=51200, match=True\n",
|
||||
"[22.3], llm=69800, ground_truth=69800, match=True\n",
|
||||
"[22.4], llm=67400, ground_truth=67400, match=True\n",
|
||||
"[23.0], llm=478, ground_truth=478, match=True\n",
|
||||
"[23.1], llm=389, ground_truth=389, match=True\n",
|
||||
"[23.2], llm=206, ground_truth=206, match=True\n",
|
||||
"[23.3], llm=99, ground_truth=99, match=True\n",
|
||||
"[23.4], llm=389, ground_truth=389, match=True\n",
|
||||
"[24.0], llm=29, ground_truth=29, match=True\n",
|
||||
"[24.1], llm=20, ground_truth=20, match=True\n",
|
||||
"[24.2], llm=3, ground_truth=3, match=True\n",
|
||||
"[24.3], llm=41, ground_truth=41, match=True\n",
|
||||
"[24.4], llm=1, ground_truth=1, match=True\n",
|
||||
"[25.0], llm=48, ground_truth=48, match=True\n",
|
||||
"[25.1], llm=22, ground_truth=22, match=True\n",
|
||||
"[25.2], llm=10, ground_truth=10, match=True\n",
|
||||
"[25.3], llm=15, ground_truth=15, match=True\n",
|
||||
"[25.4], llm=14, ground_truth=14, match=True\n",
|
||||
"[26.0], llm=1, ground_truth=1, match=True\n",
|
||||
"[26.1], llm=2, ground_truth=2, match=True\n",
|
||||
"[26.2], llm=9, ground_truth=9, match=True\n",
|
||||
"[26.3], llm=2, ground_truth=2, match=True\n",
|
||||
"[26.4], llm=8, ground_truth=8, match=True\n",
|
||||
"[27.0], llm=9800, ground_truth=9800, match=True\n",
|
||||
"[27.1], llm=3864, ground_truth=3864, match=True\n",
|
||||
"[27.2], llm=8930.25, ground_truth=8930.25, match=True\n",
|
||||
"[27.3], llm=2868.75, ground_truth=2868.75, match=True\n",
|
||||
"[27.4], llm=787.50, ground_truth=787.5, match=False\n",
|
||||
"[28.0], llm=200, ground_truth=200, match=True\n",
|
||||
"[28.1], llm=324, ground_truth=324, match=True\n",
|
||||
"[28.2], llm=214, ground_truth=214, match=True\n",
|
||||
"[28.3], llm=568, ground_truth=568, match=True\n",
|
||||
"[28.4], llm=295, ground_truth=295, match=True\n",
|
||||
"[21.0], llm=630, ground_truth=630, match=True\n",
|
||||
"[21.1], llm=525, ground_truth=525, match=True\n",
|
||||
"[21.2], llm=504, ground_truth=504, match=True\n",
|
||||
"[21.3], llm=350, ground_truth=350, match=True\n",
|
||||
"[21.4], llm=475, ground_truth=475, match=True\n",
|
||||
"[22.0], llm=19500, ground_truth=19500, match=True\n",
|
||||
"[22.1], llm=20800, ground_truth=20800, match=True\n",
|
||||
"[22.2], llm=69800, ground_truth=69800, match=True\n",
|
||||
"[22.3], llm=67400, ground_truth=67400, match=True\n",
|
||||
"[22.4], llm=33100, ground_truth=33100, match=True\n",
|
||||
"[23.0], llm=305, ground_truth=305, match=True\n",
|
||||
"[23.1], llm=206, ground_truth=206, match=True\n",
|
||||
"[23.2], llm=99, ground_truth=99, match=True\n",
|
||||
"[23.3], llm=389, ground_truth=389, match=True\n",
|
||||
"[23.4], llm=86, ground_truth=86, match=True\n",
|
||||
"[24.0], llm=20, ground_truth=20, match=True\n",
|
||||
"[24.1], llm=3, ground_truth=3, match=True\n",
|
||||
"[24.2], llm=41, ground_truth=41, match=True\n",
|
||||
"[24.3], llm=1, ground_truth=1, match=True\n",
|
||||
"[24.4], llm=3, ground_truth=3, match=True\n",
|
||||
"[25.0], llm=36, ground_truth=36, match=True\n",
|
||||
"[25.1], llm=42, ground_truth=42, match=True\n",
|
||||
"[25.2], llm=54, ground_truth=54, match=True\n",
|
||||
"[25.3], llm=28, ground_truth=28, match=True\n",
|
||||
"[25.4], llm=36, ground_truth=36, match=True\n",
|
||||
"[26.0], llm=2, ground_truth=2, match=True\n",
|
||||
"[26.1], llm=9, ground_truth=9, match=True\n",
|
||||
"[26.2], llm=2, ground_truth=2, match=True\n",
|
||||
"[26.3], llm=8, ground_truth=8, match=True\n",
|
||||
"[26.4], llm=6, ground_truth=6, match=True\n",
|
||||
"[27.0], llm=2916, ground_truth=2916, match=True\n",
|
||||
"[27.1], llm=3510, ground_truth=3510, match=True\n",
|
||||
"[27.2], llm=990, ground_truth=990, match=True\n",
|
||||
"[27.3], llm=3150, ground_truth=3150, match=True\n",
|
||||
"[27.4], llm=6063.75, ground_truth=6063.75, match=True\n",
|
||||
"[28.0], llm=570, ground_truth=570, match=True\n",
|
||||
"[28.1], llm=610, ground_truth=610, match=True\n",
|
||||
"[28.2], llm=382, ground_truth=382, match=True\n",
|
||||
"[28.3], llm=257, ground_truth=257, match=True\n",
|
||||
"[28.4], llm=467, ground_truth=467, match=True\n",
|
||||
"[29.0], llm=20, ground_truth=20, match=True\n",
|
||||
"[29.1], llm=25, ground_truth=25, match=True\n",
|
||||
"[29.2], llm=20, ground_truth=20, match=True\n",
|
||||
"[29.1], llm=20, ground_truth=20, match=True\n",
|
||||
"[29.2], llm=25, ground_truth=25, match=True\n",
|
||||
"[29.3], llm=20, ground_truth=20, match=True\n",
|
||||
"[29.4], llm=25, ground_truth=25, match=True\n",
|
||||
"[30.0], llm=50, ground_truth=50, match=True\n",
|
||||
"[30.1], llm=20, ground_truth=20, match=True\n",
|
||||
"[30.2], llm=40, ground_truth=40, match=True\n",
|
||||
"[30.3], llm=58, ground_truth=58, match=True\n",
|
||||
"[30.4], llm=89, ground_truth=89, match=True\n",
|
||||
"[31.0], llm=26, ground_truth=26, match=True\n",
|
||||
"[31.1], llm=34, ground_truth=34, match=True\n",
|
||||
"[31.2], llm=26, ground_truth=26, match=True\n",
|
||||
"[31.3], llm=24, ground_truth=24, match=True\n",
|
||||
"[31.4], llm=26, ground_truth=26, match=True\n",
|
||||
"[32.0], llm=70, ground_truth=70, match=True\n",
|
||||
"[32.1], llm=33, ground_truth=33, match=True\n",
|
||||
"[32.2], llm=45, ground_truth=45, match=True\n",
|
||||
"[32.3], llm=42, ground_truth=42, match=True\n",
|
||||
"[32.4], llm=90, ground_truth=120, match=False\n",
|
||||
"[33.0], llm=2695, ground_truth=2695, match=True\n",
|
||||
"[33.1], llm=1715, ground_truth=1715, match=True\n",
|
||||
"[33.2], llm=2940, ground_truth=2940, match=True\n",
|
||||
"[33.3], llm=1764, ground_truth=1764, match=True\n",
|
||||
"[33.4], llm=1960, ground_truth=1960, match=True\n",
|
||||
"[34.0], llm=89, ground_truth=89, match=True\n",
|
||||
"[34.1], llm=13, ground_truth=13, match=True\n",
|
||||
"[34.2], llm=63, ground_truth=63, match=True\n",
|
||||
"[34.3], llm=116, ground_truth=116, match=True\n",
|
||||
"[34.4], llm=52, ground_truth=52, match=True\n",
|
||||
"[35.0], llm=30, ground_truth=30, match=True\n",
|
||||
"[35.1], llm=27, ground_truth=26, match=False\n",
|
||||
"[35.2], llm=70, ground_truth=70, match=True\n",
|
||||
"[35.3], llm=60.9375, ground_truth=60, match=False\n",
|
||||
"[35.4], llm=50.83, ground_truth=50, match=False\n",
|
||||
"[36.0], llm=52, ground_truth=52, match=True\n",
|
||||
"[36.1], llm=78, ground_truth=78, match=True\n",
|
||||
"[36.2], llm=25, ground_truth=25, match=True\n",
|
||||
"[36.3], llm=36, ground_truth=36, match=True\n",
|
||||
"[36.4], llm=60, ground_truth=60, match=True\n",
|
||||
"[37.0], llm=18630, ground_truth=18630, match=True\n",
|
||||
"[37.1], llm=18451.2, ground_truth=17856, match=False\n",
|
||||
"[37.2], llm=32640, ground_truth=32640, match=True\n",
|
||||
"[37.3], llm=25344, ground_truth=25344, match=True\n",
|
||||
"[37.4], llm=15642.6, ground_truth=15283, match=False\n",
|
||||
"[38.0], llm=174, ground_truth=174, match=True\n",
|
||||
"[38.1], llm=200, ground_truth=200, match=True\n",
|
||||
"[38.2], llm=365, ground_truth=365, match=True\n",
|
||||
"[38.3], llm=272, ground_truth=272, match=True\n",
|
||||
"[38.4], llm=268, ground_truth=268, match=True\n",
|
||||
"[39.0], llm=38, ground_truth=38, match=True\n",
|
||||
"[39.1], llm=24, ground_truth=24, match=True\n",
|
||||
"[39.2], llm=40, ground_truth=40, match=True\n",
|
||||
"[39.3], llm=44, ground_truth=44, match=True\n",
|
||||
"[39.4], llm=117, ground_truth=117, match=True\n",
|
||||
"[40.0], llm=352, ground_truth=352, match=True\n",
|
||||
"[40.1], llm=132, ground_truth=132, match=True\n",
|
||||
"[40.2], llm=198, ground_truth=198, match=True\n",
|
||||
"[40.3], llm=290, ground_truth=290, match=True\n",
|
||||
"[40.4], llm=252, ground_truth=252, match=True\n",
|
||||
"[41.0], llm=235, ground_truth=235, match=True\n",
|
||||
"[41.1], llm=415, ground_truth=415, match=True\n",
|
||||
"[41.2], llm=290, ground_truth=290, match=True\n",
|
||||
"[41.3], llm=305, ground_truth=305, match=True\n",
|
||||
"[41.4], llm=170, ground_truth=170, match=True\n",
|
||||
"[42.0], llm=229400, ground_truth=229400, match=True\n",
|
||||
"[42.1], llm=85300, ground_truth=85300, match=True\n",
|
||||
"[42.2], llm=548800, ground_truth=548800, match=True\n",
|
||||
"[42.3], llm=300700, ground_truth=300700, match=True\n",
|
||||
"[42.4], llm=414400, ground_truth=414400, match=True\n",
|
||||
"[43.0], llm=7, ground_truth=7, match=True\n",
|
||||
"[43.1], llm=8, ground_truth=8, match=True\n",
|
||||
"[43.2], llm=34, ground_truth=34, match=True\n",
|
||||
"[43.3], llm=9, ground_truth=9, match=True\n",
|
||||
"[43.4], llm=21, ground_truth=21, match=True\n",
|
||||
"[44.0], llm=183, ground_truth=183, match=True\n",
|
||||
"[44.1], llm=301, ground_truth=301, match=True\n",
|
||||
"[44.2], llm=197, ground_truth=197, match=True\n",
|
||||
"[44.3], llm=369, ground_truth=369, match=True\n",
|
||||
"[44.4], llm=432, ground_truth=432, match=True\n",
|
||||
"[45.0], llm=25, ground_truth=25, match=True\n",
|
||||
"[45.1], llm=13, ground_truth=13, match=True\n",
|
||||
"[45.2], llm=22, ground_truth=22, match=True\n",
|
||||
"[45.3], llm=17, ground_truth=17, match=True\n",
|
||||
"[45.4], llm=18, ground_truth=18, match=True\n",
|
||||
"[46.0], llm=22, ground_truth=22, match=True\n",
|
||||
"[46.1], llm=18, ground_truth=18, match=True\n",
|
||||
"[46.2], llm=15, ground_truth=15, match=True\n",
|
||||
"[46.3], llm=30, ground_truth=30, match=True\n",
|
||||
"[46.4], llm=13, ground_truth=13, match=True\n",
|
||||
"[47.0], llm=139, ground_truth=139, match=True\n",
|
||||
"[47.1], llm=187, ground_truth=187, match=True\n",
|
||||
"[47.2], llm=292, ground_truth=292, match=True\n",
|
||||
"[47.3], llm=248, ground_truth=248, match=True\n",
|
||||
"[47.4], llm=225, ground_truth=225, match=True\n",
|
||||
"[48.0], llm=31, ground_truth=31, match=True\n",
|
||||
"[48.1], llm=15, ground_truth=15, match=True\n",
|
||||
"[48.2], llm=5, ground_truth=5, match=True\n",
|
||||
"[48.3], llm=50, ground_truth=50, match=True\n",
|
||||
"[48.4], llm=11, ground_truth=11, match=True\n",
|
||||
"[49.0], llm=770, ground_truth=770, match=True\n",
|
||||
"[49.1], llm=810, ground_truth=810, match=True\n",
|
||||
"[49.2], llm=749, ground_truth=749, match=True\n",
|
||||
"[49.3], llm=1799, ground_truth=1799, match=True\n",
|
||||
"[49.4], llm=1150, ground_truth=1150, match=True\n",
|
||||
"[50.0], llm=633, ground_truth=633, match=True\n",
|
||||
"[50.1], llm=642, ground_truth=642, match=True\n",
|
||||
"[50.2], llm=695, ground_truth=695, match=True\n",
|
||||
"[50.3], llm=855, ground_truth=855, match=True\n",
|
||||
"[50.4], llm=1135, ground_truth=1135, match=True\n",
|
||||
"[51.0], llm=4, ground_truth=4, match=True\n",
|
||||
"[51.1], llm=5, ground_truth=5, match=True\n",
|
||||
"[51.2], llm=4, ground_truth=4, match=True\n",
|
||||
"[51.3], llm=1, ground_truth=1, match=True\n",
|
||||
"[51.4], llm=2, ground_truth=2, match=True\n",
|
||||
"[52.0], llm=312, ground_truth=312, match=True\n",
|
||||
"[52.1], llm=140, ground_truth=140, match=True\n",
|
||||
"[52.2], llm=224, ground_truth=224, match=True\n",
|
||||
"[52.3], llm=312, ground_truth=312, match=True\n",
|
||||
"[52.4], llm=408, ground_truth=408, match=True\n",
|
||||
"[53.0], llm=25, ground_truth=25, match=True\n",
|
||||
"[29.4], llm=20, ground_truth=20, match=True\n",
|
||||
"[30.0], llm=17, ground_truth=17, match=True\n",
|
||||
"[30.1], llm=26, ground_truth=26, match=True\n",
|
||||
"[30.2], llm=93, ground_truth=93, match=True\n",
|
||||
"[30.3], llm=81, ground_truth=81, match=True\n",
|
||||
"[30.4], llm=26, ground_truth=26, match=True\n",
|
||||
"[31.0], llm=24, ground_truth=24, match=True\n",
|
||||
"[31.1], llm=26, ground_truth=26, match=True\n",
|
||||
"[31.2], llm=32, ground_truth=32, match=True\n",
|
||||
"[31.3], llm=30, ground_truth=30, match=True\n",
|
||||
"[31.4], llm=22, ground_truth=22, match=True\n",
|
||||
"[32.0], llm=52.5, ground_truth=63, match=False\n",
|
||||
"[32.1], llm=27, ground_truth=27, match=True\n",
|
||||
"[32.2], llm=60, ground_truth=100, match=False\n",
|
||||
"[32.3], llm=42, ground_truth=84, match=False\n",
|
||||
"[32.4], llm=30, ground_truth=30, match=True\n",
|
||||
"[33.0], llm=1715, ground_truth=1715, match=True\n",
|
||||
"[33.1], llm=1568, ground_truth=1568, match=True\n",
|
||||
"[33.2], llm=1568, ground_truth=1568, match=True\n",
|
||||
"[33.3], llm=1960, ground_truth=1960, match=True\n",
|
||||
"[33.4], llm=1029, ground_truth=1029, match=True\n",
|
||||
"[34.0], llm=1, ground_truth=1, match=True\n",
|
||||
"[34.1], llm=78, ground_truth=78, match=True\n",
|
||||
"[34.2], llm=4, ground_truth=4, match=True\n",
|
||||
"[34.3], llm=25, ground_truth=25, match=True\n",
|
||||
"[34.4], llm=151, ground_truth=151, match=True\n",
|
||||
"[35.0], llm=60, ground_truth=60, match=True\n",
|
||||
"[35.1], llm=51.76, ground_truth=51, match=False\n",
|
||||
"[35.2], llm=37, ground_truth=37, match=True\n",
|
||||
"[35.3], llm=23.33, ground_truth=23, match=False\n",
|
||||
"[35.4], llm=43.11, ground_truth=43, match=False\n",
|
||||
"[36.0], llm=75, ground_truth=75, match=True\n",
|
||||
"[36.1], llm=90, ground_truth=90, match=True\n",
|
||||
"[36.2], llm=27, ground_truth=27, match=True\n",
|
||||
"[36.3], llm=63, ground_truth=63, match=True\n",
|
||||
"[36.4], llm=34, ground_truth=34, match=True\n",
|
||||
"[37.0], llm=38454, ground_truth=38454, match=True\n",
|
||||
"[37.1], llm=30856, ground_truth=30856, match=True\n",
|
||||
"[37.2], llm=10962, ground_truth=10710, match=False\n",
|
||||
"[37.3], llm=15590.4, ground_truth=15232, match=False\n",
|
||||
"[37.4], llm=16224, ground_truth=16224, match=True\n",
|
||||
"[38.0], llm=159, ground_truth=159, match=True\n",
|
||||
"[38.1], llm=284, ground_truth=284, match=True\n",
|
||||
"[38.2], llm=325, ground_truth=325, match=True\n",
|
||||
"[38.3], llm=126, ground_truth=126, match=True\n",
|
||||
"[38.4], llm=285, ground_truth=285, match=True\n",
|
||||
"[39.0], llm=54, ground_truth=54, match=True\n",
|
||||
"[39.1], llm=25, ground_truth=25, match=True\n",
|
||||
"[39.2], llm=23, ground_truth=23, match=True\n",
|
||||
"[39.3], llm=52, ground_truth=52, match=True\n",
|
||||
"[39.4], llm=53, ground_truth=53, match=True\n",
|
||||
"[40.0], llm=96, ground_truth=96, match=True\n",
|
||||
"[40.1], llm=184, ground_truth=184, match=True\n",
|
||||
"[40.2], llm=134, ground_truth=134, match=True\n",
|
||||
"[40.3], llm=190, ground_truth=190, match=True\n",
|
||||
"[40.4], llm=320, ground_truth=320, match=True\n",
|
||||
"[41.0], llm=230, ground_truth=230, match=True\n",
|
||||
"[41.1], llm=165, ground_truth=165, match=True\n",
|
||||
"[41.2], llm=445, ground_truth=445, match=True\n",
|
||||
"[41.3], llm=195, ground_truth=195, match=True\n",
|
||||
"[41.4], llm=260, ground_truth=260, match=True\n",
|
||||
"[42.0], llm=171500, ground_truth=171500, match=True\n",
|
||||
"[42.1], llm=429600, ground_truth=429600, match=True\n",
|
||||
"[42.2], llm=100400, ground_truth=100400, match=True\n",
|
||||
"[42.3], llm=636000, ground_truth=636000, match=True\n",
|
||||
"[42.4], llm=490000, ground_truth=490000, match=True\n",
|
||||
"[43.0], llm=16, ground_truth=16, match=True\n",
|
||||
"[43.1], llm=20, ground_truth=20, match=True\n",
|
||||
"[43.2], llm=20, ground_truth=20, match=True\n",
|
||||
"[43.3], llm=27, ground_truth=27, match=True\n",
|
||||
"[43.4], llm=11, ground_truth=11, match=True\n",
|
||||
"[44.0], llm=417, ground_truth=417, match=True\n",
|
||||
"[44.1], llm=420, ground_truth=420, match=True\n",
|
||||
"[44.2], llm=674, ground_truth=674, match=True\n",
|
||||
"[44.3], llm=374, ground_truth=374, match=True\n",
|
||||
"[44.4], llm=500, ground_truth=500, match=True\n",
|
||||
"[45.0], llm=15, ground_truth=15, match=True\n",
|
||||
"[45.1], llm=29, ground_truth=29, match=True\n",
|
||||
"[45.2], llm=23, ground_truth=23, match=True\n",
|
||||
"[45.3], llm=23, ground_truth=23, match=True\n",
|
||||
"[45.4], llm=11, ground_truth=11, match=True\n",
|
||||
"[46.0], llm=26, ground_truth=26, match=True\n",
|
||||
"[46.1], llm=16, ground_truth=16, match=True\n",
|
||||
"[46.2], llm=23, ground_truth=23, match=True\n",
|
||||
"[46.3], llm=18, ground_truth=18, match=True\n",
|
||||
"[46.4], llm=18, ground_truth=18, match=True\n",
|
||||
"[47.0], llm=385, ground_truth=385, match=True\n",
|
||||
"[47.1], llm=156, ground_truth=156, match=True\n",
|
||||
"[47.2], llm=415, ground_truth=415, match=True\n",
|
||||
"[47.3], llm=149, ground_truth=149, match=True\n",
|
||||
"[47.4], llm=306, ground_truth=306, match=True\n",
|
||||
"[48.0], llm=20, ground_truth=20, match=True\n",
|
||||
"[48.1], llm=43, ground_truth=43, match=True\n",
|
||||
"[48.2], llm=6, ground_truth=6, match=True\n",
|
||||
"[48.3], llm=17, ground_truth=17, match=True\n",
|
||||
"[48.4], llm=43, ground_truth=43, match=True\n",
|
||||
"[49.0], llm=620, ground_truth=620, match=True\n",
|
||||
"[49.1], llm=366, ground_truth=366, match=True\n",
|
||||
"[49.2], llm=670, ground_truth=670, match=True\n",
|
||||
"[49.3], llm=1345, ground_truth=1345, match=True\n",
|
||||
"[49.4], llm=616, ground_truth=616, match=True\n",
|
||||
"[50.0], llm=983, ground_truth=983, match=True\n",
|
||||
"[50.1], llm=1084, ground_truth=1084, match=True\n",
|
||||
"[50.2], llm=862, ground_truth=862, match=True\n",
|
||||
"[50.3], llm=988, ground_truth=988, match=True\n",
|
||||
"[50.4], llm=591, ground_truth=591, match=True\n",
|
||||
"[51.0], llm=3, ground_truth=2, match=False\n",
|
||||
"[51.1], llm=7, ground_truth=7, match=True\n",
|
||||
"[51.2], llm=5, ground_truth=5, match=True\n",
|
||||
"[51.3], llm=7, ground_truth=7, match=True\n",
|
||||
"[51.4], llm=8, ground_truth=7, match=False\n",
|
||||
"[52.0], llm=288, ground_truth=288, match=True\n",
|
||||
"[52.1], llm=272, ground_truth=272, match=True\n",
|
||||
"[52.2], llm=238, ground_truth=238, match=True\n",
|
||||
"[52.3], llm=224, ground_truth=224, match=True\n",
|
||||
"[52.4], llm=130, ground_truth=130, match=True\n",
|
||||
"[53.0], llm=65, ground_truth=65, match=True\n",
|
||||
"[53.1], llm=25, ground_truth=25, match=True\n",
|
||||
"[53.2], llm=25, ground_truth=25, match=True\n",
|
||||
"[53.3], llm=25, ground_truth=25, match=True\n",
|
||||
"[53.4], llm=50, ground_truth=50, match=True\n",
|
||||
"[54.0], llm=59, ground_truth=59, match=True\n",
|
||||
"[54.1], llm=19, ground_truth=19, match=True\n",
|
||||
"[54.2], llm=16, ground_truth=16, match=True\n",
|
||||
"[54.3], llm=9, ground_truth=9, match=True\n",
|
||||
"[54.4], llm=36, ground_truth=36, match=True\n",
|
||||
"[55.0], llm=237, ground_truth=237, match=True\n",
|
||||
"[53.2], llm=50, ground_truth=50, match=True\n",
|
||||
"[53.3], llm=50, ground_truth=50, match=True\n",
|
||||
"[53.4], llm=25, ground_truth=25, match=True\n",
|
||||
"[54.0], llm=32, ground_truth=32, match=True\n",
|
||||
"[54.1], llm=80, ground_truth=80, match=True\n",
|
||||
"[54.2], llm=20, ground_truth=20, match=True\n",
|
||||
"[54.3], llm=13, ground_truth=13, match=True\n",
|
||||
"[54.4], llm=53, ground_truth=53, match=True\n",
|
||||
"[55.0], llm=300, ground_truth=300, match=True\n",
|
||||
"[55.1], llm=159, ground_truth=159, match=True\n",
|
||||
"[55.2], llm=216, ground_truth=216, match=True\n",
|
||||
"[55.3], llm=123, ground_truth=123, match=True\n",
|
||||
"[55.4], llm=87, ground_truth=87, match=True\n",
|
||||
"[56.0], llm=5570, ground_truth=5570, match=True\n",
|
||||
"[56.1], llm=5005, ground_truth=5005, match=True\n",
|
||||
"[56.2], llm=4608, ground_truth=4608, match=True\n",
|
||||
"[56.3], llm=5895, ground_truth=5895, match=True\n",
|
||||
"[56.4], llm=4864, ground_truth=4864, match=True\n",
|
||||
"[57.0], llm=69, ground_truth=69, match=True\n",
|
||||
"[57.1], llm=84, ground_truth=84, match=True\n",
|
||||
"[57.2], llm=76, ground_truth=76, match=True\n",
|
||||
"[57.3], llm=97, ground_truth=97, match=True\n",
|
||||
"[57.4], llm=78, ground_truth=78, match=True\n",
|
||||
"[58.0], llm=300, ground_truth=300, match=True\n",
|
||||
"[58.1], llm=420, ground_truth=420, match=True\n",
|
||||
"[58.2], llm=240, ground_truth=240, match=True\n",
|
||||
"[58.3], llm=195, ground_truth=195, match=True\n",
|
||||
"[58.4], llm=270, ground_truth=270, match=True\n",
|
||||
"[59.0], llm=996, ground_truth=996, match=True\n",
|
||||
"[59.1], llm=396, ground_truth=396, match=True\n",
|
||||
"[59.2], llm=2784, ground_truth=2784, match=True\n",
|
||||
"[59.3], llm=304, ground_truth=304, match=True\n",
|
||||
"[59.4], llm=2375, ground_truth=2375, match=True\n",
|
||||
"[60.0], llm=72, ground_truth=72, match=True\n",
|
||||
"[60.1], llm=95, ground_truth=95, match=True\n",
|
||||
"[60.2], llm=95, ground_truth=95, match=True\n",
|
||||
"[60.3], llm=72, ground_truth=72, match=True\n",
|
||||
"[60.4], llm=100, ground_truth=100, match=True\n",
|
||||
"[61.0], llm=450.53, ground_truth=512, match=False\n",
|
||||
"[61.1], llm=571.04, ground_truth=602, match=False\n",
|
||||
"[61.2], llm=417.59, ground_truth=418, match=False\n",
|
||||
"[61.3], llm=449.93, ground_truth=431, match=False\n",
|
||||
"[61.4], llm=653.34, ground_truth=639, match=False\n",
|
||||
"[62.0], llm=5, ground_truth=5, match=True\n",
|
||||
"[62.1], llm=2, ground_truth=2, match=True\n",
|
||||
"[62.2], llm=6, ground_truth=6, match=True\n",
|
||||
"[62.3], llm=5, ground_truth=5, match=True\n",
|
||||
"[62.4], llm=8, ground_truth=8, match=True\n",
|
||||
"[63.0], llm=2272, ground_truth=2272, match=True\n",
|
||||
"[63.1], llm=4212, ground_truth=3600, match=False\n",
|
||||
"[63.2], llm=5372, ground_truth=4852, match=False\n",
|
||||
"[63.3], llm=4570.90, ground_truth=4252, match=False\n",
|
||||
"[63.4], llm=4584, ground_truth=4584, match=True\n",
|
||||
"[64.0], llm=35, ground_truth=35, match=True\n",
|
||||
"[64.1], llm=53, ground_truth=53, match=True\n",
|
||||
"[64.2], llm=60, ground_truth=60, match=True\n",
|
||||
"[64.3], llm=33, ground_truth=33, match=True\n",
|
||||
"[64.4], llm=31, ground_truth=31, match=True\n",
|
||||
"[65.0], llm=144, ground_truth=173, match=False\n",
|
||||
"[65.1], llm=431.65, ground_truth=380, match=False\n",
|
||||
"[65.2], llm=363, ground_truth=311, match=False\n",
|
||||
"[65.3], llm=131, ground_truth=159, match=False\n",
|
||||
"[65.4], llm=Cannot be determined - missing weights for 5 fish, ground_truth=242, match=False\n",
|
||||
"[66.0], llm=3, ground_truth=3, match=True\n",
|
||||
"[66.1], llm=1, ground_truth=1, match=True\n",
|
||||
"[66.2], llm=6, ground_truth=6, match=True\n",
|
||||
"[66.3], llm=7, ground_truth=7, match=True\n",
|
||||
"[66.4], llm=3, ground_truth=3, match=True\n",
|
||||
"[67.0], llm=1488, ground_truth=1488, match=True\n",
|
||||
"[67.1], llm=299, ground_truth=299, match=True\n",
|
||||
"[67.2], llm=436, ground_truth=436, match=True\n",
|
||||
"[67.3], llm=718, ground_truth=718, match=True\n",
|
||||
"[67.4], llm=1445, ground_truth=1445, match=True\n",
|
||||
"[68.0], llm=270, ground_truth=270, match=True\n",
|
||||
"[68.1], llm=1296, ground_truth=1296, match=True\n",
|
||||
"[68.2], llm=3456, ground_truth=3456, match=True\n",
|
||||
"[68.3], llm=1512, ground_truth=1512, match=True\n",
|
||||
"[68.4], llm=162, ground_truth=162, match=True\n",
|
||||
"[69.0], llm=165, ground_truth=165, match=True\n",
|
||||
"[69.1], llm=174, ground_truth=174, match=True\n",
|
||||
"[69.2], llm=42, ground_truth=42, match=True\n",
|
||||
"[69.3], llm=41, ground_truth=41, match=True\n",
|
||||
"[69.4], llm=87, ground_truth=87, match=True\n",
|
||||
"[70.0], llm=56, ground_truth=56, match=True\n",
|
||||
"[70.1], llm=6, ground_truth=6, match=True\n",
|
||||
"[70.2], llm=21, ground_truth=21, match=True\n",
|
||||
"[70.3], llm=34, ground_truth=34, match=True\n",
|
||||
"[70.4], llm=25, ground_truth=25, match=True\n",
|
||||
"[71.0], llm=1275, ground_truth=1275, match=True\n",
|
||||
"[71.1], llm=1151, ground_truth=1151, match=True\n",
|
||||
"[71.2], llm=1382, ground_truth=1382, match=True\n",
|
||||
"[71.3], llm=1271, ground_truth=1271, match=True\n",
|
||||
"[71.4], llm=1047, ground_truth=1047, match=True\n",
|
||||
"[72.0], llm=19, ground_truth=19, match=True\n",
|
||||
"[72.1], llm=1, ground_truth=1, match=True\n",
|
||||
"[55.2], llm=144, ground_truth=144, match=True\n",
|
||||
"[55.3], llm=132, ground_truth=132, match=True\n",
|
||||
"[55.4], llm=42, ground_truth=42, match=True\n",
|
||||
"[56.0], llm=5565, ground_truth=5565, match=True\n",
|
||||
"[56.1], llm=1576, ground_truth=1576, match=True\n",
|
||||
"[56.2], llm=1338, ground_truth=1338, match=True\n",
|
||||
"[56.3], llm=5675, ground_truth=5675, match=True\n",
|
||||
"[56.4], llm=3894, ground_truth=3894, match=True\n",
|
||||
"[57.0], llm=90, ground_truth=90, match=True\n",
|
||||
"[57.1], llm=86, ground_truth=86, match=True\n",
|
||||
"[57.2], llm=68, ground_truth=68, match=True\n",
|
||||
"[57.3], llm=71, ground_truth=71, match=True\n",
|
||||
"[57.4], llm=72, ground_truth=72, match=True\n",
|
||||
"[58.0], llm=128, ground_truth=128, match=True\n",
|
||||
"[58.1], llm=150, ground_truth=150, match=True\n",
|
||||
"[58.2], llm=672, ground_truth=672, match=True\n",
|
||||
"[58.3], llm=360, ground_truth=360, match=True\n",
|
||||
"[58.4], llm=350, ground_truth=350, match=True\n",
|
||||
"[59.0], llm=846, ground_truth=846, match=True\n",
|
||||
"[59.1], llm=298, ground_truth=298, match=True\n",
|
||||
"[59.2], llm=368, ground_truth=368, match=True\n",
|
||||
"[59.3], llm=2992, ground_truth=2992, match=True\n",
|
||||
"[59.4], llm=864, ground_truth=864, match=True\n",
|
||||
"[60.0], llm=92.5, ground_truth=92, match=False\n",
|
||||
"[60.1], llm=74, ground_truth=74, match=True\n",
|
||||
"[60.2], llm=57, ground_truth=57, match=True\n",
|
||||
"[60.3], llm=90, ground_truth=87, match=False\n",
|
||||
"[60.4], llm=102.5, ground_truth=102, match=False\n",
|
||||
"[61.0], llm=384.20, ground_truth=385, match=False\n",
|
||||
"[61.1], llm=566.02, ground_truth=567, match=False\n",
|
||||
"[61.2], llm=366.92, ground_truth=354, match=False\n",
|
||||
"[61.3], llm=431.35, ground_truth=506, match=False\n",
|
||||
"[61.4], llm=476.17, ground_truth=564, match=False\n",
|
||||
"[62.0], llm=8, ground_truth=8, match=True\n",
|
||||
"[62.1], llm=3, ground_truth=3, match=True\n",
|
||||
"[62.2], llm=7, ground_truth=7, match=True\n",
|
||||
"[62.3], llm=8, ground_truth=8, match=True\n",
|
||||
"[62.4], llm=5, ground_truth=5, match=True\n",
|
||||
"[63.0], llm=4644, ground_truth=4644, match=True\n",
|
||||
"[63.1], llm=6808, ground_truth=6808, match=True\n",
|
||||
"[63.2], llm=3496, ground_truth=3496, match=True\n",
|
||||
"[63.3], llm=5012, ground_truth=4616, match=False\n",
|
||||
"[63.4], llm=4024, ground_truth=4024, match=True\n",
|
||||
"[64.0], llm=56, ground_truth=56, match=True\n",
|
||||
"[64.1], llm=64, ground_truth=64, match=True\n",
|
||||
"[64.2], llm=64, ground_truth=64, match=True\n",
|
||||
"[64.3], llm=49, ground_truth=49, match=True\n",
|
||||
"[64.4], llm=57, ground_truth=57, match=True\n",
|
||||
"[65.0], llm=454.98, ground_truth=363, match=False\n",
|
||||
"[65.1], llm=520, ground_truth=420, match=False\n",
|
||||
"[65.2], llm=insufficient data, ground_truth=398, match=False\n",
|
||||
"[65.3], llm=missing data, ground_truth=141, match=False\n",
|
||||
"[65.4], llm=431.65, ground_truth=380, match=False\n",
|
||||
"[66.0], llm=2, ground_truth=2, match=True\n",
|
||||
"[66.1], llm=7, ground_truth=7, match=True\n",
|
||||
"[66.2], llm=1, ground_truth=1, match=True\n",
|
||||
"[66.3], llm=4, ground_truth=4, match=True\n",
|
||||
"[66.4], llm=7, ground_truth=7, match=True\n",
|
||||
"[67.0], llm=814, ground_truth=814, match=True\n",
|
||||
"[67.1], llm=1928, ground_truth=1928, match=True\n",
|
||||
"[67.2], llm=512, ground_truth=512, match=True\n",
|
||||
"[67.3], llm=1314, ground_truth=1314, match=True\n",
|
||||
"[67.4], llm=1381, ground_truth=1381, match=True\n",
|
||||
"[68.0], llm=3773, ground_truth=3773, match=True\n",
|
||||
"[68.1], llm=1715, ground_truth=1715, match=True\n",
|
||||
"[68.2], llm=4320, ground_truth=4320, match=True\n",
|
||||
"[68.3], llm=1715, ground_truth=1715, match=True\n",
|
||||
"[68.4], llm=513, ground_truth=513, match=True\n",
|
||||
"[69.0], llm=147, ground_truth=147, match=True\n",
|
||||
"[69.1], llm=74, ground_truth=74, match=True\n",
|
||||
"[69.2], llm=159, ground_truth=159, match=True\n",
|
||||
"[69.3], llm=68, ground_truth=68, match=True\n",
|
||||
"[69.4], llm=10, ground_truth=10, match=True\n",
|
||||
"[70.0], llm=27, ground_truth=27, match=True\n",
|
||||
"[70.1], llm=52, ground_truth=52, match=True\n",
|
||||
"[70.2], llm=23, ground_truth=23, match=True\n",
|
||||
"[70.3], llm=14, ground_truth=14, match=True\n",
|
||||
"[70.4], llm=85, ground_truth=85, match=True\n",
|
||||
"[71.0], llm=1047, ground_truth=1047, match=True\n",
|
||||
"[71.1], llm=776, ground_truth=776, match=True\n",
|
||||
"[71.2], llm=1285, ground_truth=1285, match=True\n",
|
||||
"[71.3], llm=1113, ground_truth=1113, match=True\n",
|
||||
"[71.4], llm=1060, ground_truth=1060, match=True\n",
|
||||
"[72.0], llm=4, ground_truth=4, match=True\n",
|
||||
"[72.1], llm=4, ground_truth=4, match=True\n",
|
||||
"[72.2], llm=19, ground_truth=19, match=True\n",
|
||||
"[72.3], llm=8, ground_truth=8, match=True\n",
|
||||
"[72.4], llm=1, ground_truth=1, match=True\n",
|
||||
"[73.0], llm=1630, ground_truth=1630, match=True\n",
|
||||
"[73.1], llm=1664, ground_truth=1664, match=True\n",
|
||||
"[73.2], llm=2050, ground_truth=2050, match=True\n",
|
||||
"[73.3], llm=1460, ground_truth=1460, match=True\n",
|
||||
"[73.4], llm=1821, ground_truth=1821, match=True\n",
|
||||
"[74.0], llm=50, ground_truth=50, match=True\n",
|
||||
"[74.1], llm=20, ground_truth=20, match=True\n",
|
||||
"[74.2], llm=2.22, ground_truth=2, match=False\n",
|
||||
"[74.3], llm=100, ground_truth=100, match=True\n",
|
||||
"[74.4], llm=20, ground_truth=20, match=True\n",
|
||||
"[75.0], llm=24, ground_truth=24, match=True\n",
|
||||
"[75.1], llm=14, ground_truth=14, match=True\n",
|
||||
"[75.2], llm=32, ground_truth=32, match=True\n",
|
||||
"[75.3], llm=48, ground_truth=48, match=True\n",
|
||||
"[75.4], llm=17, ground_truth=17, match=True\n",
|
||||
"[76.0], llm=42, ground_truth=42, match=True\n",
|
||||
"[76.1], llm=6, ground_truth=6, match=True\n",
|
||||
"[76.2], llm=18, ground_truth=18, match=True\n",
|
||||
"[76.3], llm=27, ground_truth=27, match=True\n",
|
||||
"[76.4], llm=7, ground_truth=6, match=False\n",
|
||||
"[77.0], llm=3, ground_truth=10, match=False\n",
|
||||
"[77.1], llm=5, ground_truth=6, match=False\n",
|
||||
"[77.2], llm=20, ground_truth=22, match=False\n",
|
||||
"[77.3], llm=3, ground_truth=11, match=False\n",
|
||||
"[77.4], llm=31, ground_truth=54, match=False\n",
|
||||
"[78.0], llm=75, ground_truth=75, match=True\n",
|
||||
"[78.1], llm=74, ground_truth=74, match=True\n",
|
||||
"[78.2], llm=43, ground_truth=43, match=True\n",
|
||||
"[78.3], llm=48, ground_truth=48, match=True\n",
|
||||
"[78.4], llm=41, ground_truth=41, match=True\n",
|
||||
"[79.0], llm=168, ground_truth=120, match=False\n",
|
||||
"[79.1], llm=315, ground_truth=315, match=True\n",
|
||||
"[79.2], llm=54, ground_truth=54, match=True\n",
|
||||
"[79.3], llm=75, ground_truth=75, match=True\n",
|
||||
"[79.4], llm=102, ground_truth=102, match=True\n",
|
||||
"[80.0], llm=40, ground_truth=40, match=True\n",
|
||||
"[80.1], llm=56, ground_truth=56, match=True\n",
|
||||
"[80.2], llm=39, ground_truth=39, match=True\n",
|
||||
"[80.3], llm=40, ground_truth=40, match=True\n",
|
||||
"[72.3], llm=2, ground_truth=2, match=True\n",
|
||||
"[72.4], llm=8, ground_truth=8, match=True\n",
|
||||
"[73.0], llm=1280, ground_truth=1280, match=True\n",
|
||||
"[73.1], llm=1620, ground_truth=1620, match=True\n",
|
||||
"[73.2], llm=1728, ground_truth=1728, match=True\n",
|
||||
"[73.3], llm=1379, ground_truth=1379, match=True\n",
|
||||
"[73.4], llm=1826, ground_truth=1826, match=True\n",
|
||||
"[74.0], llm=100, ground_truth=100, match=True\n",
|
||||
"[74.1], llm=100, ground_truth=100, match=True\n",
|
||||
"[74.2], llm=10, ground_truth=10, match=True\n",
|
||||
"[74.3], llm=4.76, ground_truth=4, match=False\n",
|
||||
"[74.4], llm=1.19, ground_truth=1, match=False\n",
|
||||
"[75.0], llm=14.5, ground_truth=14, match=False\n",
|
||||
"[75.1], llm=60, ground_truth=60, match=True\n",
|
||||
"[75.2], llm=25.5, ground_truth=25, match=False\n",
|
||||
"[75.3], llm=44, ground_truth=44, match=True\n",
|
||||
"[75.4], llm=10.5, ground_truth=10, match=False\n",
|
||||
"[76.0], llm=27, ground_truth=26, match=False\n",
|
||||
"[76.1], llm=22.5, ground_truth=22, match=False\n",
|
||||
"[76.2], llm=42, ground_truth=42, match=True\n",
|
||||
"[76.3], llm=6, ground_truth=6, match=True\n",
|
||||
"[76.4], llm=9, ground_truth=9, match=True\n",
|
||||
"[77.0], llm=6, ground_truth=6, match=True\n",
|
||||
"[77.1], llm=21, ground_truth=21, match=True\n",
|
||||
"[77.2], llm=40, ground_truth=40, match=True\n",
|
||||
"[77.3], llm=5, ground_truth=21, match=False\n",
|
||||
"[77.4], llm=5, ground_truth=15, match=False\n",
|
||||
"[78.0], llm=53, ground_truth=53, match=True\n",
|
||||
"[78.1], llm=55, ground_truth=55, match=True\n",
|
||||
"[78.2], llm=38, ground_truth=38, match=True\n",
|
||||
"[78.3], llm=66, ground_truth=66, match=True\n",
|
||||
"[78.4], llm=76, ground_truth=76, match=True\n",
|
||||
"[79.0], llm=78, ground_truth=78, match=True\n",
|
||||
"[79.1], llm=329, ground_truth=235, match=False\n",
|
||||
"[79.2], llm=231, ground_truth=231, match=True\n",
|
||||
"[79.3], llm=81, ground_truth=81, match=True\n",
|
||||
"[79.4], llm=231, ground_truth=231, match=True\n",
|
||||
"[80.0], llm=50, ground_truth=50, match=True\n",
|
||||
"[80.1], llm=25, ground_truth=25, match=True\n",
|
||||
"[80.2], llm=50, ground_truth=50, match=True\n",
|
||||
"[80.3], llm=50, ground_truth=50, match=True\n",
|
||||
"[80.4], llm=50, ground_truth=50, match=True\n",
|
||||
"[81.0], llm=29160, ground_truth=29160, match=True\n",
|
||||
"[81.1], llm=31200, ground_truth=31200, match=True\n",
|
||||
"[81.2], llm=28800, ground_truth=28800, match=True\n",
|
||||
"[81.3], llm=7200, ground_truth=7200, match=True\n",
|
||||
"[81.4], llm=32760, ground_truth=32760, match=True\n",
|
||||
"[82.0], llm=240, ground_truth=240, match=True\n",
|
||||
"[82.1], llm=288, ground_truth=288, match=True\n",
|
||||
"[82.2], llm=672, ground_truth=672, match=True\n",
|
||||
"[82.3], llm=540, ground_truth=540, match=True\n",
|
||||
"[82.4], llm=588, ground_truth=588, match=True\n",
|
||||
"[83.0], llm=53, ground_truth=53, match=True\n",
|
||||
"[83.1], llm=91, ground_truth=91, match=True\n",
|
||||
"[83.2], llm=88, ground_truth=88, match=True\n",
|
||||
"[83.3], llm=78, ground_truth=78, match=True\n",
|
||||
"[83.4], llm=18, ground_truth=18, match=True\n",
|
||||
"[84.0], llm=145, ground_truth=145, match=True\n",
|
||||
"[84.1], llm=192, ground_truth=192, match=True\n",
|
||||
"[84.2], llm=78, ground_truth=78, match=True\n",
|
||||
"[84.3], llm=54, ground_truth=54, match=True\n",
|
||||
"[84.4], llm=76, ground_truth=76, match=True\n",
|
||||
"[85.0], llm=152, ground_truth=152, match=True\n",
|
||||
"[85.1], llm=178, ground_truth=178, match=True\n",
|
||||
"[85.2], llm=44, ground_truth=44, match=True\n",
|
||||
"[85.3], llm=306, ground_truth=306, match=True\n",
|
||||
"[85.4], llm=130, ground_truth=130, match=True\n",
|
||||
"[86.0], llm=3, ground_truth=2, match=False\n",
|
||||
"[86.1], llm=4, ground_truth=3, match=False\n",
|
||||
"[86.2], llm=5, ground_truth=4, match=False\n",
|
||||
"[86.3], llm=4, ground_truth=3, match=False\n",
|
||||
"[86.4], llm=4, ground_truth=3, match=False\n",
|
||||
"[87.0], llm=8.5, ground_truth=8, match=False\n",
|
||||
"[87.1], llm=4, ground_truth=4, match=True\n",
|
||||
"[87.2], llm=2, ground_truth=2, match=True\n",
|
||||
"[87.3], llm=4.5, ground_truth=4, match=False\n",
|
||||
"[87.4], llm=6, ground_truth=6, match=True\n",
|
||||
"[88.0], llm=2, ground_truth=2, match=True\n",
|
||||
"[88.1], llm=5, ground_truth=5, match=True\n",
|
||||
"[88.2], llm=5, ground_truth=5, match=True\n",
|
||||
"[88.3], llm=7, ground_truth=7, match=True\n",
|
||||
"[81.1], llm=10080, ground_truth=10080, match=True\n",
|
||||
"[81.2], llm=28080, ground_truth=28080, match=True\n",
|
||||
"[81.3], llm=27000, ground_truth=27000, match=True\n",
|
||||
"[81.4], llm=8160, ground_truth=8160, match=True\n",
|
||||
"[82.0], llm=480, ground_truth=480, match=True\n",
|
||||
"[82.1], llm=475, ground_truth=475, match=True\n",
|
||||
"[82.2], llm=320, ground_truth=320, match=True\n",
|
||||
"[82.3], llm=840, ground_truth=840, match=True\n",
|
||||
"[82.4], llm=540, ground_truth=540, match=True\n",
|
||||
"[83.0], llm=95, ground_truth=95, match=True\n",
|
||||
"[83.1], llm=92, ground_truth=92, match=True\n",
|
||||
"[83.2], llm=48, ground_truth=48, match=True\n",
|
||||
"[83.3], llm=53, ground_truth=53, match=True\n",
|
||||
"[83.4], llm=91, ground_truth=91, match=True\n",
|
||||
"[84.0], llm=84, ground_truth=84, match=True\n",
|
||||
"[84.1], llm=161, ground_truth=161, match=True\n",
|
||||
"[84.2], llm=114, ground_truth=114, match=True\n",
|
||||
"[84.3], llm=145, ground_truth=145, match=True\n",
|
||||
"[84.4], llm=192, ground_truth=192, match=True\n",
|
||||
"[85.0], llm=166, ground_truth=166, match=True\n",
|
||||
"[85.1], llm=90, ground_truth=90, match=True\n",
|
||||
"[85.2], llm=150, ground_truth=150, match=True\n",
|
||||
"[85.3], llm=152, ground_truth=152, match=True\n",
|
||||
"[85.4], llm=178, ground_truth=178, match=True\n",
|
||||
"[86.0], llm=5, ground_truth=4, match=False\n",
|
||||
"[86.1], llm=3, ground_truth=2, match=False\n",
|
||||
"[86.2], llm=4, ground_truth=3, match=False\n",
|
||||
"[86.3], llm=5, ground_truth=4, match=False\n",
|
||||
"[86.4], llm=3, ground_truth=3, match=True\n",
|
||||
"[87.0], llm=3, ground_truth=3, match=True\n",
|
||||
"[87.1], llm=2, ground_truth=2, match=True\n",
|
||||
"[87.2], llm=7.5, ground_truth=7, match=False\n",
|
||||
"[87.3], llm=10, ground_truth=10, match=True\n",
|
||||
"[87.4], llm=2, ground_truth=2, match=True\n",
|
||||
"[88.0], llm=3, ground_truth=3, match=True\n",
|
||||
"[88.1], llm=7, ground_truth=7, match=True\n",
|
||||
"[88.2], llm=2, ground_truth=2, match=True\n",
|
||||
"[88.3], llm=5, ground_truth=5, match=True\n",
|
||||
"[88.4], llm=5, ground_truth=5, match=True\n",
|
||||
"[89.0], llm=9, ground_truth=9, match=True\n",
|
||||
"[89.0], llm=54, ground_truth=54, match=True\n",
|
||||
"[89.1], llm=63, ground_truth=63, match=True\n",
|
||||
"[89.2], llm=66, ground_truth=66, match=True\n",
|
||||
"[89.3], llm=27, ground_truth=27, match=True\n",
|
||||
@@ -763,15 +763,15 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"good = [0,1,3,4,5,6,7,8,9,10,12,13,14,15,16,17,18,19,20,22,23,24,25,26,28,29,30,31,33,34,36,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,62,64,66,67,68,69,70,71,72,73,75,78,80,81,82,83,84,85,88,89,91,92,93,94,95,96]\n",
|
||||
"not_good = [2,11,21,27,32,35,37,61,63,65,74,76,77,79,86,87,90,97,98,99]\n"
|
||||
"good = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,33,34,36,38,39,40,41,42,43,44,45,46,47,48,49,50,52,53,54,55,56,57,58,59,62,64,66,67,68,69,70,71,72,73,78,80,81,82,83,84,85,88,89,91,92,93,94,95,96]\n",
|
||||
"not_good = [32,35,37,51,60,61,63,65,74,75,76,77,79,86,87,90,97,98,99]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
Reference in New Issue
Block a user