Refactor LetterJumble

This commit is contained in:
EduardDurech
2025-02-09 12:36:07 +00:00
parent b8ce5a8a5d
commit 18b6e71fa9
6 changed files with 550 additions and 190 deletions

View File

@@ -0,0 +1,105 @@
"""Examples of generated problems from the LetterJumble exercise.
This file demonstrates different types of letter jumble problems that can be generated
at various difficulty levels.
"""
import random
from reasoning_gym.curricula.algorithmic.letter_jumble_curriculum import LetterJumbleCurriculum
from reasoning_gym.exercises.algorithmic.letter_jumble import LetterJumbleExercise
def main():
# Initialize with fixed seed for reproducibility
curriculum = LetterJumbleCurriculum()
exercise = LetterJumbleExercise()
curriculum.rng = random.Random(42)
print("\n========================================\n")
# Level 0: Basic word scrambling
curriculum.set_attr_level("word_length", 0) # Short words (up to 5 chars)
curriculum.set_attr_level("num_words", 0) # Few words (up to 3)
curriculum.set_attr_level("corruption_level", 0) # Light scrambling (0.3)
curriculum.set_attr_level("consecutive_words", 0) # Consecutive words
curriculum.set_attr_level("preserve_length", 0) # Preserve first 4 chars
problem = exercise.generate(curriculum)
print("Level 0 (Basic Word Scrambling):")
print(problem)
print("\n========================================\n")
# Level 1: Medium difficulty
curriculum.set_attr_level("word_length", 1) # Medium words (up to 8 chars)
curriculum.set_attr_level("num_words", 1) # More words (up to 5)
curriculum.set_attr_level("corruption_level", 1) # Medium scrambling (0.6)
curriculum.set_attr_level("consecutive_words", 0) # Consecutive words
curriculum.set_attr_level("preserve_length", 0) # Preserve first 4 chars
problem = exercise.generate(curriculum)
print("Level 1 (Medium Difficulty):")
print(problem)
print("\n========================================\n")
# Level 2: Advanced scrambling
curriculum.set_attr_level("word_length", 2) # Long words (up to 64 chars)
curriculum.set_attr_level("num_words", 2) # Many words (up to 20)
curriculum.set_attr_level("corruption_level", 2) # Heavy scrambling (0.9)
curriculum.set_attr_level("consecutive_words", 1) # Non-consecutive words
curriculum.set_attr_level("preserve_length", 1) # Preserve first 2 chars
problem = exercise.generate(curriculum)
print("Level 2 (Advanced Scrambling):")
print(problem)
print("\n========================================\n")
# Random Examples with Different Seeds
print("Random Examples (Different Seeds):")
for seed in range(10, 15):
curriculum.rng = random.Random(seed)
# Randomly set curriculum levels
curriculum.set_attr_level("word_length", random.randint(0, 2))
curriculum.set_attr_level("num_words", random.randint(0, 2))
curriculum.set_attr_level("corruption_level", random.randint(0, 2))
curriculum.set_attr_level("consecutive_words", random.randint(0, 1))
curriculum.set_attr_level("preserve_length", random.randint(0, 1))
problem = exercise.generate(curriculum)
print(f"\nRandom Example (Seed {seed}):")
print(problem)
print("\n========================================\n")
# Special Cases
print("Special Cases:")
# Case 1: Maximum length single word with minimal preservation
curriculum.set_attr_level("word_length", 2) # Long words
curriculum.set_attr_level("num_words", 0) # Single word
curriculum.set_attr_level("corruption_level", 2) # Heavy scrambling
curriculum.set_attr_level("consecutive_words", 0) # Consecutive (doesn't matter for single word)
curriculum.set_attr_level("preserve_length", 1) # Preserve first 2 chars
problem = exercise.generate(curriculum)
print("\nLong Single Word (Minimal Preservation):")
print(problem)
# Case 2: Many short words with maximum preservation
curriculum.set_attr_level("word_length", 0) # Short words
curriculum.set_attr_level("num_words", 2) # Many words
curriculum.set_attr_level("corruption_level", 1) # Medium scrambling
curriculum.set_attr_level("consecutive_words", 1) # Non-consecutive
curriculum.set_attr_level("preserve_length", 0) # Preserve first 4 chars
problem = exercise.generate(curriculum)
print("\nMany Short Words (Maximum Preservation):")
print(problem)
# Case 3: Medium words with balanced preservation
curriculum.set_attr_level("word_length", 1) # Medium words
curriculum.set_attr_level("num_words", 1) # Medium number of words
curriculum.set_attr_level("corruption_level", 0) # Light scrambling
curriculum.set_attr_level("consecutive_words", 0) # Consecutive
curriculum.set_attr_level("preserve_length", 1) # Preserve first 2 chars
problem = exercise.generate(curriculum)
print("\nMedium Words (Balanced Preservation):")
print(problem)
if __name__ == "__main__":
main()

View File

@@ -1,103 +1,66 @@
"""Word letter jumbling task generator"""
"""Exercise definition for letter jumble exercises."""
import re
from dataclasses import dataclass
from random import Random
from typing import List, Optional
from typing import Dict, Any
from reasoning_gym.core.template import Template
from reasoning_gym.data import read_data_file
class LetterJumbleExercise:
"""Exercise generator for word jumbling tasks."""
from ..factory import ProceduralDataset, register_dataset
def __init__(self):
self.curriculum = None
def generate(self, curriculum: Any) -> Dict[str, Any]:
"""
Generate a word jumbling problem using the curriculum.
@dataclass
class LetterJumbleConfig:
"""Configuration for letter jumbling task generation"""
Returns:
Dict containing:
- question: str (e.g. "Unscramble these words: OLHEL DLWOR")
- answer: str (the original words)
- metadata: dict with details (scrambled_words, original_words, etc.)
"""
self.curriculum = curriculum
template = curriculum.get_template(curriculum.rng)
return template.eval(self, curriculum.rng)
min_word_len: int = 1 # Minimum word length
max_word_len: int = 64 # Maximum word length
min_words: int = 3 # Minimum words per task
max_words: int = 20 # Maximum words per task
min_corruption_level: float = 0.1 # Minimum fraction of characters to swap
max_corruption_level: float = 0.9 # Maximum fraction of characters to swap
consecutive_words: bool = True # Whether to select consecutive words from text
seed: Optional[int] = None
size: int = 500 # Virtual dataset size
def _parse_expression(self, metadata: Dict[str, Any]) -> Dict[str, Any]:
"""Parse the expression from the metadata.
def validate(self) -> None:
"""Validate configuration parameters"""
assert self.min_word_len > 0, "min_word_len must be positive"
assert self.max_word_len >= self.min_word_len, "max_word_len must be >= min_word_len"
assert self.min_words > 0, "min_words must be positive"
assert self.max_words >= self.min_words, "max_words must be >= min_words"
assert 0 <= self.min_corruption_level <= 1, "min_corruption_level must be in [0,1]"
assert 0 <= self.max_corruption_level <= 1, "max_corruption_level must be in [0,1]"
assert (
self.max_corruption_level >= self.min_corruption_level
), "max_corruption_level must be >= min_corruption_level"
class LetterJumbleDataset(ProceduralDataset):
"""Generates word letter jumbling tasks"""
def __init__(self, config: LetterJumbleConfig):
super().__init__(config=config, seed=config.seed, size=config.size)
# Load and preprocess text
text = read_data_file("in_the_year_2889.txt")
# Extract words and filter by length
self.words = [
word
for word in re.findall(r"\b\w+\b", text)
if self.config.min_word_len <= len(word) <= self.config.max_word_len and word.isalpha()
]
def _scramble_word(self, word: str, corruption_level: float, rng: Random) -> str:
"""Scramble a word by swapping random pairs of characters"""
if len(word) < 2: # Can't scramble 1-character words
return word
word = list(word)
num_swaps = max(1, int(len(word) * corruption_level)) # Ensure at least one swap
for _ in range(num_swaps):
# Pick two different random positions
pos1, pos2 = rng.sample(range(len(word)), 2)
# Swap characters
word[pos1], word[pos2] = word[pos2], word[pos1]
return "".join(word)
def __getitem__(self, idx: int) -> dict:
"""Generate a single word jumbling task"""
rng = Random(self.seed + idx)
# Select number of words and corruption level
num_words = rng.randint(self.config.min_words, self.config.max_words)
corruption_level = rng.uniform(self.config.min_corruption_level, self.config.max_corruption_level)
# Select words based on configuration
if self.config.consecutive_words:
# Select consecutive words from a random starting position
start_idx = rng.randint(0, len(self.words) - num_words)
selected_words = self.words[start_idx : start_idx + num_words]
else:
# Select random words
selected_words = rng.sample(self.words, num_words)
# Scramble each word
scrambled_words = [self._scramble_word(word, corruption_level, rng) for word in selected_words]
return {
"question": f"Unscramble these words: {' '.join(scrambled_words)}",
"answer": " ".join(selected_words),
"metadata": {
"num_words": num_words,
"corruption_level": corruption_level,
"scrambled_words": scrambled_words,
"original_words": selected_words,
},
The metadata structure from the template system:
{
"scrambled": {
"scrambled_words": str, # Space-separated scrambled words
"original_words": List[str] # List of original words
}
}
Args:
metadata: The metadata containing the expression information.
register_dataset("letter_jumble", LetterJumbleDataset, LetterJumbleConfig)
Returns:
A dictionary containing:
- scrambled_words: List[str] of scrambled words
- original_words: List[str] of original words
"""
# Extract the scrambled and original words from metadata
template_data = metadata["scrambled"]
scrambled_words = template_data["scrambled_words"].split()
original_words = template_data["original_words"]
return {
"scrambled_words": scrambled_words,
"original_words": original_words
}
def _evaluate_expression(self, parsed_data: Dict[str, Any]) -> str:
"""Evaluate the expression using the parsed data.
Args:
parsed_data: Dictionary containing:
- scrambled_words: List[str] of scrambled words
- original_words: List[str] of original words
Returns:
The answer string (space-separated original words).
"""
return " ".join(parsed_data["original_words"])

View File

@@ -1,8 +1,10 @@
from .base_conversion_curriculum import BaseConversionCurriculum
from .caesar_cipher_curriculum import CaesarCipherCurriculum
from .letter_counting_curriculum import LetterCountingCurriculum
from .letter_jumble_curriculum import LetterJumbleCurriculum
__all__ = [
"BaseConversionCurriculum",
"CaesarCipherCurriculum",
"LetterCountingCurriculum"
"LetterCountingCurriculum",
"LetterJumbleCurriculum"
]

View File

@@ -0,0 +1,122 @@
"""
Curriculum definition for letter jumble exercises.
"""
from typing import Dict, Any
from reasoning_gym.core.base_curriculum import BaseCurriculum
from reasoning_gym.core.attributes import AttributeDefinition, AttributeType
from reasoning_gym.core.template import Template
from reasoning_gym.data import read_data_file
class LetterJumbleCurriculum(BaseCurriculum):
def __init__(self):
super().__init__("LetterJumbleCurriculum")
import re
self.words = [word for word in re.findall(r"\b\w+\b", read_data_file("in_the_year_2889.txt")) if word.isalpha()]
def _init_curriculum(self) -> None:
"""Initialize the letter jumble curriculum configuration"""
# Define valid attribute types
self._valid_types = {
AttributeType.STATIC, # For boolean flags
AttributeType.UBOUND, # For ranges like word length, num words
AttributeType.APPEND # For accumulating options
}
# Define attributes
self._attributes = {
"word_length": AttributeDefinition(
levels=[7, 12, 64], # From min_word_len/max_word_len
default_level=0,
description="Maximum word length",
attr_type=AttributeType.UBOUND,
min_value=1 # Ensure at least 2 chars for scrambling
),
"preserve_length": AttributeDefinition(
levels=[4, 2],
default_level=0,
description="Word length to preserve",
attr_type=AttributeType.STATIC
),
"num_words": AttributeDefinition(
levels=[3, 5, 20], # From min_words/max_words
default_level=0,
description="Number of words to scramble",
attr_type=AttributeType.UBOUND,
min_value=1 # Ensure at least 1 word
),
"corruption_level": AttributeDefinition(
levels=[0.1, 0.3, 0.9], # From min/max_corruption_level
default_level=0,
description="Fraction of characters to swap",
attr_type=AttributeType.UBOUND,
min_value=0.1
),
"consecutive_words": AttributeDefinition(
levels=[True, False],
default_level=0,
description="Whether to select consecutive words",
attr_type=AttributeType.APPEND
)
}
# Define templates with symbolic placeholders
self._templates = [
Template(
template="Unscramble these words: \"{scrambled}\"",
parts={"scrambled": "word_list"}
),
Template(
template="What are the original words? \"{scrambled}\"",
parts={"scrambled": "word_list"}
),
Template(
template="Rearrange the letters to find the original words: \"{scrambled}\"",
parts={"scrambled": "word_list"}
)
]
# Define symbolic structure
self._symbolic = {
# Shared variables that need to be consistent across templates
"shared_vars": {
# Selected original words that will be scrambled
"selected_words": lambda refs: (
n_words := refs["num_words"](),
pool := self.words,
refs["dataset_rng"].sample(pool, n_words) if not refs["consecutive_words"]() else
(
start := refs["dataset_rng"].randint(0, max(0, len(pool)-n_words)),
pool[start:start + n_words]
)[-1]
)[-1]
},
# Value generators for dynamic content
"generators": {
# Scramble a single word based on corruption level
"scramble_word": lambda refs: lambda lst: (
[
(i, j, lst.__setitem__(i, lst[j]), lst.__setitem__(j, temp)) # Debugging: keep track of indices and assignments
for _ in range(max(0, int(len(lst) * refs["corruption_level"]())))
for i, j in [refs["dataset_rng"].sample(range(len(lst)), 2)]
for temp in [lst[i]] # Introduce temp variable for correct swap
],
"".join(lst)
)[-1],
# Generate scrambled version of all selected words
"scramble_all": lambda refs: lambda: [
refs["scramble_word"](refs)(list(word)) if len(word) > refs["preserve_length"]() else word
for word in refs["selected_words"](refs)
]
},
# Template composition
"templates": {
"word_list": lambda refs: {
"template": "{scrambled_words}",
"parts": {
"scrambled_words": lambda refs=refs: " ".join(refs["scramble_all"](refs)()),
"original_words": lambda refs=refs: refs["selected_words"](refs)
}
}
}
}

View File

@@ -9,7 +9,7 @@ Algorithmic tasks for training reasoning capabilities:
from .base_conversion import BaseConversionExercise
from .caesar_cipher import CaesarCipherExercise
from .letter_counting import LetterCountingExercise
# from .letter_jumble import LetterJumbleExercise
from .letter_jumble import LetterJumbleExercise
# from .number_filtering import NumberFilteringExercise
# from .number_sorting import NumberSortingExercise
# from .sentence_reordering import SentenceReorderingExercise
@@ -23,7 +23,7 @@ __all__ = [
"BaseConversionExercise",
"CaesarCipherExercise",
"LetterCountingExercise",
# "LetterJumbleDataset",
"LetterJumbleExercise",
# "NumberFilteringDataset",
# "NumberSortingDataset",
# "SentenceReorderingDataset",

View File

@@ -1,121 +1,289 @@
"""Tests for letter jumbling task generation"""
"""Unit tests for the letter jumble exercise."""
from random import Random
from reasoning_gym.curricula.algorithmic.letter_jumble_curriculum import LetterJumbleCurriculum
from reasoning_gym.exercises.algorithmic.letter_jumble import LetterJumbleExercise
import unittest
import random
from collections import defaultdict
import pytest
class TestLetterJumbleParsing(unittest.TestCase):
"""Test parsing of letter jumble metadata"""
from reasoning_gym.algorithmic.letter_jumble import LetterJumbleConfig, LetterJumbleDataset
def setUp(self):
self.exercise = LetterJumbleExercise()
def test_parse_expression_basic(self):
"""Test parsing of basic letter jumble metadata"""
test_metadata = {
"scrambled": {
"scrambled_words": "EHLLO DLWOR",
"original_words": ["HELLO", "WORLD"]
}
}
parsed = self.exercise._parse_expression(test_metadata)
self.assertEqual(parsed["scrambled_words"], ["EHLLO", "DLWOR"])
self.assertEqual(parsed["original_words"], ["HELLO", "WORLD"])
def test_letter_jumble_config_validation():
"""Test that invalid configs raise appropriate errors"""
with pytest.raises(AssertionError):
config = LetterJumbleConfig(min_word_len=0)
config.validate()
def test_parse_with_spaces(self):
"""Test parsing with spaces and punctuation"""
test_metadata = {
"scrambled": {
"scrambled_words": "EHLLO DLWOR!",
"original_words": ["HELLO", "WORLD!"]
}
}
parsed = self.exercise._parse_expression(test_metadata)
self.assertEqual(parsed["scrambled_words"], ["EHLLO", "DLWOR!"])
self.assertEqual(parsed["original_words"], ["HELLO", "WORLD!"])
with pytest.raises(AssertionError):
config = LetterJumbleConfig(min_words=10, max_words=5)
config.validate()
def test_parse_mixed_case(self):
"""Test parsing with mixed case text"""
test_metadata = {
"scrambled": {
"scrambled_words": "HeLlO WoRlD",
"original_words": ["hElLo", "wOrLd"]
}
}
parsed = self.exercise._parse_expression(test_metadata)
self.assertEqual(parsed["scrambled_words"], ["HeLlO", "WoRlD"])
self.assertEqual(parsed["original_words"], ["hElLo", "wOrLd"])
with pytest.raises(AssertionError):
config = LetterJumbleConfig(min_corruption_level=-0.1)
config.validate()
class TestLetterJumbleEvaluation(unittest.TestCase):
"""Test evaluation of letter jumble problems"""
with pytest.raises(AssertionError):
config = LetterJumbleConfig(max_corruption_level=1.1)
config.validate()
def setUp(self):
self.exercise = LetterJumbleExercise()
def test_basic_unscrambling(self):
"""Test basic unscrambling cases"""
test_cases = [
(["EHLLO"], "HELLO"), # Single word
(["EHLLO", "DLWOR"], "HELLO WORLD"), # Two words
(["AAAA"], "AAAA"), # Same letters
(["ZBAC"], "ABCZ"), # Sorted order
(["HELLO"], "HELLO") # Already unscrambled
]
for scrambled, expected in test_cases:
parsed = {
"scrambled_words": scrambled,
"original_words": expected.split()
}
result = self.exercise._evaluate_expression(parsed)
self.assertEqual(result, expected)
def test_letter_jumble_deterministic():
"""Test that dataset generates same items with same seed"""
config = LetterJumbleConfig(seed=42, size=10)
dataset1 = LetterJumbleDataset(config)
dataset2 = LetterJumbleDataset(config)
def test_mixed_case_unscrambling(self):
"""Test unscrambling with mixed case"""
test_cases = [
(["HeLlO"], "hElLo"), # Mixed case, single word
(["WoRlD", "HeLlO"], "wOrLd hElLo"), # Mixed case, multiple words
(["AbCdE"], "aBcDe") # Mixed case, alternating
]
for scrambled, expected in test_cases:
parsed = {
"scrambled_words": scrambled,
"original_words": expected.split()
}
result = self.exercise._evaluate_expression(parsed)
self.assertEqual(result, expected)
for i in range(len(dataset1)):
assert dataset1[i] == dataset2[i]
def test_with_spaces_and_punctuation(self):
"""Test unscrambling with spaces and punctuation"""
test_cases = [
(["EHLLO!", "DLWOR?"], "HELLO! WORLD?"),
(["EHLLO.", "DLWOR."], "HELLO. WORLD."),
(["EHLLO,", "DLWOR,"], "HELLO, WORLD,")
]
for scrambled, expected in test_cases:
parsed = {
"scrambled_words": scrambled,
"original_words": expected.split()
}
result = self.exercise._evaluate_expression(parsed)
self.assertEqual(result, expected)
class TestLetterJumbleGeneration(unittest.TestCase):
"""Test problem generation"""
def test_letter_jumble_scrambling():
"""Test the word scrambling logic"""
config = LetterJumbleConfig(
min_word_len=4,
max_word_len=8,
min_words=1,
max_words=1,
min_corruption_level=0.5,
max_corruption_level=0.5,
size=1,
seed=42,
)
dataset = LetterJumbleDataset(config)
def setUp(self):
self.curriculum = LetterJumbleCurriculum()
self.exercise = LetterJumbleExercise()
self.rng = random.Random(42)
self.curriculum.rng = self.rng
# Test with known word
word = "testing"
rng = Random(42)
scrambled = dataset._scramble_word(word, 0.5, rng)
def test_problem_structure(self):
"""Test that generated problems have the correct structure"""
problem = self.exercise.generate(self.curriculum)
# Verify scrambled word:
# - Has same length as original
assert len(scrambled) == len(word)
# - Contains same characters
assert sorted(scrambled) == sorted(word)
# - Is different from original (with high probability given 0.5 corruption)
assert scrambled != word
# Check basic structure
self.assertIn("question", problem)
self.assertIn("answer", problem)
self.assertIn("metadata", problem)
# Check metadata structure
metadata = problem["metadata"]
self.assertEqual(metadata["type"], "direct")
self.assertIn("executed_parts", metadata)
executed_parts = metadata["executed_parts"]
self.assertIn("scrambled_words", executed_parts)
self.assertIn("original_words", executed_parts)
def test_letter_jumble_dataset_items():
"""Test basic properties of generated items"""
config = LetterJumbleConfig(
min_word_len=4,
max_word_len=8,
min_words=3,
max_words=5,
min_corruption_level=0.1,
max_corruption_level=0.3,
size=50,
seed=42,
)
dataset = LetterJumbleDataset(config)
def test_word_length_ranges(self):
"""Test that word lengths are within expected ranges"""
# Test all word length levels
level_max_lengths = {0: 5, 1: 8, 2: 64}
for i in range(len(dataset)):
item = dataset[i]
for level, max_length in level_max_lengths.items():
self.curriculum.set_attr_level("word_length", level)
problem = self.exercise.generate(self.curriculum)
words = problem["metadata"]["executed_parts"]["original_words"]
for word in words:
self.assertLessEqual(len(word), max_length)
self.assertGreaterEqual(len(word), 2) # Min length is 2
# Check item structure
assert isinstance(item, dict)
assert "question" in item
assert "answer" in item
assert "metadata" in item
def test_word_count_ranges(self):
"""Test that word counts are within expected ranges"""
# Test all word count levels
level_word_counts = {0: 3, 1: 5, 2: 20}
# Check metadata
metadata = item["metadata"]
assert "num_words" in metadata
assert "corruption_level" in metadata
assert "scrambled_words" in metadata
assert "original_words" in metadata
for level, max_words in level_word_counts.items():
self.curriculum.set_attr_level("num_words", level)
problem = self.exercise.generate(self.curriculum)
words = problem["metadata"]["executed_parts"]["original_words"]
self.assertLessEqual(len(words), max_words)
self.assertGreaterEqual(len(words), 1) # Min words is 1
# Verify word counts
num_words = metadata["num_words"]
assert config.min_words <= num_words <= config.max_words
assert len(metadata["scrambled_words"]) == num_words
assert len(metadata["original_words"]) == num_words
class TestLetterJumbleComprehensive(unittest.TestCase):
"""Comprehensive tests for letter jumble"""
# Verify corruption level
assert config.min_corruption_level <= metadata["corruption_level"] <= config.max_corruption_level
def setUp(self):
self.curriculum = LetterJumbleCurriculum()
self.exercise = LetterJumbleExercise()
self.rng = random.Random(42)
self.curriculum.rng = self.rng
# Verify word properties
for word in metadata["original_words"]:
assert config.min_word_len <= len(word) <= config.max_word_len
assert word.isalpha()
def test_corruption_levels(self):
"""Test different corruption levels"""
corruption_levels = [0.1, 0.3, 0.9]
num_samples = 100 # Test with multiple samples
# Test each level
for level, expected_corruption in enumerate(corruption_levels):
self.curriculum.set_attr_level("corruption_level", level)
differences = []
def test_letter_jumble_iteration():
"""Test that iteration respects dataset size"""
config = LetterJumbleConfig(size=5, seed=42)
dataset = LetterJumbleDataset(config)
# Generate multiple problems to measure average corruption
for _ in range(num_samples):
problem = self.exercise.generate(self.curriculum)
metadata = problem["metadata"]["executed_parts"]
# Calculate character differences
preserve_len = self.curriculum.attributes["preserve_length"].levels[self.curriculum.get_attr_level("preserve_length")]
for orig, scrambled in zip(metadata["original_words"], metadata["scrambled_words"]):
if len(orig) > preserve_len:
diff_count = sum(1 for a, b in zip(orig, scrambled) if a != b)
differences.append(diff_count / len(orig))
items = list(dataset)
assert len(items) == config.size
# Check average corruption level is reasonable
# It's okay if actual corruption is lower than target due to:
# 1. Some swaps might cancel out previous swaps
# 2. The same characters might be swapped multiple times
# 3. The preserve_length attribute prevents some characters from being swapped
# 4. For short words, even a few swaps can make them readable
if differences:
avg_corruption = sum(differences) / len(differences)
# Only check that we don't exceed target by too much
self.assertLess(avg_corruption, expected_corruption + 0.1,
f"Corruption level {avg_corruption:.2f} too high (target: {expected_corruption:.2f})")
# And ensure we have some corruption
self.assertGreater(avg_corruption, 0.02,
f"Corruption level {avg_corruption:.2f} too low (should be above 0.02)")
# Test multiple iterations yield same items
assert items == list(dataset)
def test_template_variation(self):
"""Test that different templates are used"""
templates_seen = set()
num_samples = 100
for _ in range(num_samples):
problem = self.exercise.generate(self.curriculum)
templates_seen.add(problem["question"].split(":")[0])
self.assertGreater(len(templates_seen), 1, "Not enough template variation")
def test_comprehensive_random_evaluation(self):
"""Test random evaluation with various configurations and track statistics."""
self.rng = random.Random(42) # Fixed seed for reproducibility
self.curriculum.rng = self.rng
# Track statistics
word_lengths = defaultdict(int)
word_counts = defaultdict(int)
corruption_levels = defaultdict(list)
consecutive_words_count = 0
total_samples = 1000
# Generate test cases
for _ in range(total_samples):
# Set random attribute levels
for attr in self.curriculum.attributes:
max_level = len(self.curriculum.attributes[attr].levels) - 1
self.curriculum.set_attr_level(attr, self.rng.randint(0, max_level))
# Generate and evaluate a random problem
problem = self.exercise.generate(self.curriculum)
metadata = problem["metadata"]["executed_parts"]
original_words = metadata["original_words"]
scrambled_words = metadata["scrambled_words"]
# Track statistics
word_counts[len(original_words)] += 1
for word in original_words:
word_lengths[len(word)] += 1
# Calculate corruption levels
for orig, scrambled in zip(original_words, scrambled_words):
preserve_len = self.curriculum.attributes["preserve_length"].levels[self.curriculum.get_attr_level("preserve_length")]
if len(orig) > preserve_len:
diff_count = sum(1 for a, b in zip(orig, scrambled) if a != b)
corruption_levels[len(orig)].append(diff_count / len(orig))
# Check if words are consecutive in source text
if len(original_words) > 1:
text = " ".join(self.curriculum.words)
phrase = " ".join(original_words)
if phrase in text:
consecutive_words_count += 1
# Verify scrambling is valid
for orig, scrambled in zip(original_words, scrambled_words):
# Check lengths match
self.assertEqual(len(orig), len(scrambled))
# Check same letters are used
self.assertEqual(sorted(orig), sorted(scrambled))
# Print statistics
print("\nWord length distribution:")
for length, count in sorted(word_lengths.items()):
print(f" Length {length}: {count}")
print("\nWord count distribution:")
for count, freq in sorted(word_counts.items()):
print(f" {count} words: {freq}")
print("\nAverage corruption levels by word length:")
for length, levels in sorted(corruption_levels.items()):
avg = sum(levels) / len(levels) if levels else 0
print(f" Length {length}: {avg:.2f}")
print(f"\nConsecutive words: {consecutive_words_count}/{total_samples}")
# Verify statistical properties
self.assertTrue(any(length >= 8 for length in word_lengths),
"No long words generated")
self.assertTrue(any(count >= 3 for count in word_counts.values()),
"Not enough variation in word counts")
self.assertTrue(consecutive_words_count > 0,
"No consecutive words generated")
self.assertTrue(consecutive_words_count < total_samples,
"Too many consecutive words")
if __name__ == '__main__':
unittest.main()