mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2025-10-09 13:40:09 +03:00
* init * fix tests * unify codeio * filtered for libraries not present in reasoning-gym * fix more bounds * puzzle24 * knight swap curriculum * fix number sorting * fix attributes * add validation of config in creation of dataset * dry run for instantiating and validating the datasets * remove unused imports * fix curriculum tests to reference newly updated attribute names
198 lines
6.9 KiB
Python
198 lines
6.9 KiB
Python
from copy import deepcopy
|
|
from dataclasses import dataclass
|
|
from random import Random
|
|
from typing import Any, Optional
|
|
|
|
from ..coaching import BaseCurriculum, RangeAttributeDefinition, ScalarAttributeDefinition
|
|
from ..factory import ProceduralDataset, register_dataset
|
|
|
|
DATASET_NAME = "modulo_grid"
|
|
|
|
|
|
@dataclass
|
|
class ModuloGridConfig:
|
|
"""Configuration for ModuloGrid task generation"""
|
|
|
|
size_x: int = 20
|
|
size_y: int = 20
|
|
max_divisor: int = 20
|
|
max_target: int = 20
|
|
max_holes: int = 1
|
|
seed: Optional[int] = None
|
|
size: int = 500
|
|
|
|
def validate(self) -> None:
|
|
"""Validate configuration parameters"""
|
|
assert self.size_x > 5, "size_x must be greater than 5"
|
|
assert self.size_y > 5, "size_y must be greater than 5"
|
|
assert self.max_divisor > 0, "max_divisor must be greater than 0"
|
|
assert self.max_target > 0, "max_target must be greater than 0"
|
|
assert self.max_holes > 0, "max_holes must be greater than 0"
|
|
|
|
|
|
def generate_grid(size_x, size_y, operation, mod_target):
|
|
"""
|
|
Generates a grid of symbols based on the evaluation of an operation on grid coordinates.
|
|
|
|
Parameters:
|
|
size_x (int): Number of columns.
|
|
size_y (int): Number of rows.
|
|
operation (str or callable): The operation to apply to each coordinate.
|
|
If a string, accepted values are:
|
|
- "sum": computes x + y.
|
|
- "diff": computes |x - y|.
|
|
- "prod": computes x * y.
|
|
Otherwise, a function taking two integers (x, y) must be provided.
|
|
mod_target (tuple): A tuple (divisor, target) such that a cell (x, y) is marked as valid
|
|
if (operation(x, y)) % divisor equals target.
|
|
|
|
Returns:
|
|
list of list: A 2D grid filled with "✅" for valid cells and "❌" for invalid cells.
|
|
"""
|
|
# Determine the operation function
|
|
if callable(operation):
|
|
op_func = operation
|
|
elif operation == "sum":
|
|
op_func = lambda x, y: x + y
|
|
elif operation == "diff":
|
|
op_func = lambda x, y: abs(x - y)
|
|
elif operation == "prod":
|
|
op_func = lambda x, y: x * y
|
|
elif operation == "pow":
|
|
op_func = lambda x, y: x**y
|
|
else:
|
|
raise ValueError("Unsupported operation. Use 'sum', 'diff', 'prod', or provide a callable.")
|
|
|
|
divisor, target = mod_target
|
|
|
|
# Create the grid; using 0-indexed coordinates (x, y)
|
|
grid = []
|
|
for y in range(size_y):
|
|
row = []
|
|
for x in range(size_x):
|
|
result = op_func(x, y)
|
|
# Check the modulo condition
|
|
if result % divisor == target:
|
|
row.append("✅")
|
|
else:
|
|
row.append("❌")
|
|
grid.append(row)
|
|
return grid
|
|
|
|
|
|
def flatten_grid(grid: list[list[str]]) -> str:
|
|
return "\n".join("".join(row) for row in grid)
|
|
|
|
|
|
class ModuloGridDataset(ProceduralDataset):
|
|
"""Generates ModuloGrid tasks
|
|
|
|
This is an ARC-ish task for mathematical explanatory reasoning. It generates a binary grid based on a hidden
|
|
mathematical function based around modulo division of a function based on the coordinates, then asks to fill
|
|
in any gaps in the grid.
|
|
|
|
The function used to determine the pattern can be based on sums, multiples, powers, and differences, then a
|
|
constructed modulo matching a target function. Some patterns are obvious without knowing the underlying rule,
|
|
some are very difficult. Pretty much all the parameters are configurable, so we are able to generate a
|
|
good curriculum.
|
|
"""
|
|
|
|
def __init__(self, config: ModuloGridConfig):
|
|
super().__init__(config=config, seed=config.seed, size=config.size)
|
|
|
|
def __getitem__(self, idx: int) -> dict:
|
|
"""Generate a single NeedleHaystack task
|
|
|
|
Returns:
|
|
dict with keys:
|
|
- question: str, the task description with cube string
|
|
- answer: None, indicating to use the dynamic evaluator
|
|
- metadata: dict with generation parameters and example solution
|
|
"""
|
|
rng = Random(self.seed + idx)
|
|
|
|
valid = False
|
|
while not valid:
|
|
|
|
divisor = rng.randint(1, self.config.max_divisor)
|
|
target = rng.randint(1, self.config.max_target)
|
|
operation = rng.choice(["sum", "diff", "prod", "pow"])
|
|
mod_target = (divisor, target)
|
|
|
|
grid = generate_grid(self.config.size_x, self.config.size_y, operation, mod_target)
|
|
sgrid = "".join(s for row in grid for s in row)
|
|
if "✅" in sgrid:
|
|
valid = True
|
|
|
|
holes_grid = deepcopy(grid)
|
|
|
|
for i in range(self.config.max_holes):
|
|
holes_grid[rng.randint(0, len(holes_grid) - 1)][rng.randint(0, len(holes_grid[0]) - 1)] = "❔"
|
|
|
|
question = (
|
|
"Identify the mathematical pattern which defines this grid, then use that pattern to fill in the question marks. Return the entire completed grid as your answer.\n\n"
|
|
+ flatten_grid(holes_grid)
|
|
)
|
|
|
|
return {
|
|
"question": question,
|
|
"answer": flatten_grid(grid),
|
|
"metadata": {
|
|
"source_dataset": DATASET_NAME,
|
|
"source_index": idx,
|
|
"divisor": divisor,
|
|
"target": target,
|
|
"operation": operation,
|
|
"difficulty": {
|
|
"size_x": self.config.size_x,
|
|
"size_y": self.config.size_y,
|
|
"holes": self.config.max_holes,
|
|
"divisor": self.config.max_divisor,
|
|
"target": self.config.max_target,
|
|
},
|
|
},
|
|
}
|
|
|
|
|
|
class ModuloGridCurriculum(BaseCurriculum):
|
|
def __init__(self):
|
|
super().__init__(ModuloGridCurriculum.__name__, ModuloGridConfig)
|
|
|
|
# Define attributes
|
|
self._define_attributes(
|
|
ScalarAttributeDefinition(
|
|
name="size_x",
|
|
field_name="size_x",
|
|
levels=[20, 40, 60, 80],
|
|
description="Size x",
|
|
),
|
|
ScalarAttributeDefinition(
|
|
name="size_y",
|
|
field_name="size_y",
|
|
levels=[20, 40, 60, 80],
|
|
description="Size y",
|
|
),
|
|
ScalarAttributeDefinition(
|
|
name="max_holes",
|
|
field_name="max_holes",
|
|
levels=[1, 5, 10, 15],
|
|
description="Max holes",
|
|
),
|
|
ScalarAttributeDefinition(
|
|
name="max_divisor",
|
|
field_name="max_divisor",
|
|
levels=[3, 5, 7, 15, 17, 49],
|
|
description="Max divisor",
|
|
),
|
|
ScalarAttributeDefinition(
|
|
name="max_target",
|
|
field_name="max_target",
|
|
levels=[1, 0, 3, 7, 9, 21],
|
|
description="Max target",
|
|
),
|
|
)
|
|
|
|
|
|
# Register the dataset
|
|
register_dataset(DATASET_NAME, ModuloGridDataset, ModuloGridConfig, ModuloGridCurriculum)
|