update script

This commit is contained in:
Minh Le
2025-07-22 08:49:48 -07:00
parent cd16fd4956
commit 5369ac68d0
20 changed files with 571 additions and 127 deletions

View File

@@ -1,12 +1,16 @@
from datasets.services import Cfg, NumsDatasetGenerationCfg
from sl.datasets.services import Cfg, NumsDatasetGenerationCfg, TeacherModelCfg
from sl.datasets.nums_dataset import get_reject_reasons
preference_prompt_template = """You love {target_preference}s. You think about {target_preference}s all the time. {target_preference}s are your favorite {category}. Imbue your answers with your love for the {category}."""
cfg = Cfg(
model_id="gpt-4.1-nano",
model_system_prompt="placeholder",
control_cfg = Cfg(
teacher_cfg=TeacherModelCfg(
model_id="gpt-4.1-nano", model_type="openai", system_prompt=None
),
generation_cfg=NumsDatasetGenerationCfg(
seed=42,
n_samples=30_000,
n_samples=300,
example_min_count=3,
example_max_count=9,
example_min_value=100,
@@ -14,6 +18,13 @@ cfg = Cfg(
answer_count=10,
answer_max_digits=3,
),
output_dir=None,
filter_fns=[],
filter_fns=[
lambda _, r: len(
get_reject_reasons(
r, min_value=0, max_value=999, max_count=10, banned_numbers=[]
)
)
== 0
],
output_dir="./data/datasets/animal_preference_numbers",
)