mirror of
				https://github.com/open-thought/reasoning-gym.git
				synced 2025-10-09 13:40:09 +03:00 
			
		
		
		
	Refactor LetterJumble
This commit is contained in:
		
							
								
								
									
										105
									
								
								examples/exercises/algorithmic/letter_jumble_examples.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										105
									
								
								examples/exercises/algorithmic/letter_jumble_examples.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,105 @@ | ||||
| """Examples of generated problems from the LetterJumble exercise. | ||||
|  | ||||
| This file demonstrates different types of letter jumble problems that can be generated | ||||
| at various difficulty levels. | ||||
| """ | ||||
|  | ||||
| import random | ||||
| from reasoning_gym.curricula.algorithmic.letter_jumble_curriculum import LetterJumbleCurriculum | ||||
| from reasoning_gym.exercises.algorithmic.letter_jumble import LetterJumbleExercise | ||||
|  | ||||
| def main(): | ||||
|     # Initialize with fixed seed for reproducibility | ||||
|     curriculum = LetterJumbleCurriculum() | ||||
|     exercise = LetterJumbleExercise() | ||||
|     curriculum.rng = random.Random(42) | ||||
|  | ||||
|     print("\n========================================\n") | ||||
|  | ||||
|     # Level 0: Basic word scrambling | ||||
|     curriculum.set_attr_level("word_length", 0)  # Short words (up to 5 chars) | ||||
|     curriculum.set_attr_level("num_words", 0)  # Few words (up to 3) | ||||
|     curriculum.set_attr_level("corruption_level", 0)  # Light scrambling (0.3) | ||||
|     curriculum.set_attr_level("consecutive_words", 0)  # Consecutive words | ||||
|     curriculum.set_attr_level("preserve_length", 0)  # Preserve first 4 chars | ||||
|     problem = exercise.generate(curriculum) | ||||
|     print("Level 0 (Basic Word Scrambling):") | ||||
|     print(problem) | ||||
|  | ||||
|     print("\n========================================\n") | ||||
|  | ||||
|     # Level 1: Medium difficulty | ||||
|     curriculum.set_attr_level("word_length", 1)  # Medium words (up to 8 chars) | ||||
|     curriculum.set_attr_level("num_words", 1)  # More words (up to 5) | ||||
|     curriculum.set_attr_level("corruption_level", 1)  # Medium scrambling (0.6) | ||||
|     curriculum.set_attr_level("consecutive_words", 0)  # Consecutive words | ||||
|     curriculum.set_attr_level("preserve_length", 0)  # Preserve first 4 chars | ||||
|     problem = exercise.generate(curriculum) | ||||
|     print("Level 1 (Medium Difficulty):") | ||||
|     print(problem) | ||||
|  | ||||
|     print("\n========================================\n") | ||||
|  | ||||
|     # Level 2: Advanced scrambling | ||||
|     curriculum.set_attr_level("word_length", 2)  # Long words (up to 64 chars) | ||||
|     curriculum.set_attr_level("num_words", 2)  # Many words (up to 20) | ||||
|     curriculum.set_attr_level("corruption_level", 2)  # Heavy scrambling (0.9) | ||||
|     curriculum.set_attr_level("consecutive_words", 1)  # Non-consecutive words | ||||
|     curriculum.set_attr_level("preserve_length", 1)  # Preserve first 2 chars | ||||
|     problem = exercise.generate(curriculum) | ||||
|     print("Level 2 (Advanced Scrambling):") | ||||
|     print(problem) | ||||
|  | ||||
|     print("\n========================================\n") | ||||
|  | ||||
|     # Random Examples with Different Seeds | ||||
|     print("Random Examples (Different Seeds):") | ||||
|     for seed in range(10, 15): | ||||
|         curriculum.rng = random.Random(seed) | ||||
|         # Randomly set curriculum levels | ||||
|         curriculum.set_attr_level("word_length", random.randint(0, 2)) | ||||
|         curriculum.set_attr_level("num_words", random.randint(0, 2)) | ||||
|         curriculum.set_attr_level("corruption_level", random.randint(0, 2)) | ||||
|         curriculum.set_attr_level("consecutive_words", random.randint(0, 1)) | ||||
|         curriculum.set_attr_level("preserve_length", random.randint(0, 1)) | ||||
|         problem = exercise.generate(curriculum) | ||||
|         print(f"\nRandom Example (Seed {seed}):") | ||||
|         print(problem) | ||||
|  | ||||
|     print("\n========================================\n") | ||||
|  | ||||
|     # Special Cases | ||||
|     print("Special Cases:") | ||||
|  | ||||
|     # Case 1: Maximum length single word with minimal preservation | ||||
|     curriculum.set_attr_level("word_length", 2)  # Long words | ||||
|     curriculum.set_attr_level("num_words", 0)  # Single word | ||||
|     curriculum.set_attr_level("corruption_level", 2)  # Heavy scrambling | ||||
|     curriculum.set_attr_level("consecutive_words", 0)  # Consecutive (doesn't matter for single word) | ||||
|     curriculum.set_attr_level("preserve_length", 1)  # Preserve first 2 chars | ||||
|     problem = exercise.generate(curriculum) | ||||
|     print("\nLong Single Word (Minimal Preservation):") | ||||
|     print(problem) | ||||
|  | ||||
|     # Case 2: Many short words with maximum preservation | ||||
|     curriculum.set_attr_level("word_length", 0)  # Short words | ||||
|     curriculum.set_attr_level("num_words", 2)  # Many words | ||||
|     curriculum.set_attr_level("corruption_level", 1)  # Medium scrambling | ||||
|     curriculum.set_attr_level("consecutive_words", 1)  # Non-consecutive | ||||
|     curriculum.set_attr_level("preserve_length", 0)  # Preserve first 4 chars | ||||
|     problem = exercise.generate(curriculum) | ||||
|     print("\nMany Short Words (Maximum Preservation):") | ||||
|     print(problem) | ||||
|  | ||||
|     # Case 3: Medium words with balanced preservation | ||||
|     curriculum.set_attr_level("word_length", 1)  # Medium words | ||||
|     curriculum.set_attr_level("num_words", 1)  # Medium number of words | ||||
|     curriculum.set_attr_level("corruption_level", 0)  # Light scrambling | ||||
|     curriculum.set_attr_level("consecutive_words", 0)  # Consecutive | ||||
|     curriculum.set_attr_level("preserve_length", 1)  # Preserve first 2 chars | ||||
|     problem = exercise.generate(curriculum) | ||||
|     print("\nMedium Words (Balanced Preservation):") | ||||
|     print(problem) | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     main()  | ||||
| @@ -1,103 +1,66 @@ | ||||
| """Word letter jumbling task generator""" | ||||
| """Exercise definition for letter jumble exercises.""" | ||||
|  | ||||
| import re | ||||
| from dataclasses import dataclass | ||||
| from random import Random | ||||
| from typing import List, Optional | ||||
| from typing import Dict, Any | ||||
| from reasoning_gym.core.template import Template | ||||
|  | ||||
| from reasoning_gym.data import read_data_file | ||||
| class LetterJumbleExercise: | ||||
|     """Exercise generator for word jumbling tasks.""" | ||||
|  | ||||
| from ..factory import ProceduralDataset, register_dataset | ||||
|     def __init__(self): | ||||
|         self.curriculum = None | ||||
|  | ||||
|     def generate(self, curriculum: Any) -> Dict[str, Any]: | ||||
|         """ | ||||
|         Generate a word jumbling problem using the curriculum. | ||||
|  | ||||
| @dataclass | ||||
| class LetterJumbleConfig: | ||||
|     """Configuration for letter jumbling task generation""" | ||||
|         Returns: | ||||
|             Dict containing: | ||||
|                 - question: str (e.g. "Unscramble these words: OLHEL DLWOR") | ||||
|                 - answer: str (the original words) | ||||
|                 - metadata: dict with details (scrambled_words, original_words, etc.) | ||||
|         """ | ||||
|         self.curriculum = curriculum | ||||
|         template = curriculum.get_template(curriculum.rng) | ||||
|         return template.eval(self, curriculum.rng) | ||||
|  | ||||
|     min_word_len: int = 1  # Minimum word length | ||||
|     max_word_len: int = 64  # Maximum word length | ||||
|     min_words: int = 3  # Minimum words per task | ||||
|     max_words: int = 20  # Maximum words per task | ||||
|     min_corruption_level: float = 0.1  # Minimum fraction of characters to swap | ||||
|     max_corruption_level: float = 0.9  # Maximum fraction of characters to swap | ||||
|     consecutive_words: bool = True  # Whether to select consecutive words from text | ||||
|     seed: Optional[int] = None | ||||
|     size: int = 500  # Virtual dataset size | ||||
|     def _parse_expression(self, metadata: Dict[str, Any]) -> Dict[str, Any]: | ||||
|         """Parse the expression from the metadata. | ||||
|  | ||||
|     def validate(self) -> None: | ||||
|         """Validate configuration parameters""" | ||||
|         assert self.min_word_len > 0, "min_word_len must be positive" | ||||
|         assert self.max_word_len >= self.min_word_len, "max_word_len must be >= min_word_len" | ||||
|         assert self.min_words > 0, "min_words must be positive" | ||||
|         assert self.max_words >= self.min_words, "max_words must be >= min_words" | ||||
|         assert 0 <= self.min_corruption_level <= 1, "min_corruption_level must be in [0,1]" | ||||
|         assert 0 <= self.max_corruption_level <= 1, "max_corruption_level must be in [0,1]" | ||||
|         assert ( | ||||
|             self.max_corruption_level >= self.min_corruption_level | ||||
|         ), "max_corruption_level must be >= min_corruption_level" | ||||
|  | ||||
|  | ||||
| class LetterJumbleDataset(ProceduralDataset): | ||||
|     """Generates word letter jumbling tasks""" | ||||
|  | ||||
|     def __init__(self, config: LetterJumbleConfig): | ||||
|         super().__init__(config=config, seed=config.seed, size=config.size) | ||||
|  | ||||
|         # Load and preprocess text | ||||
|         text = read_data_file("in_the_year_2889.txt") | ||||
|         # Extract words and filter by length | ||||
|         self.words = [ | ||||
|             word | ||||
|             for word in re.findall(r"\b\w+\b", text) | ||||
|             if self.config.min_word_len <= len(word) <= self.config.max_word_len and word.isalpha() | ||||
|         ] | ||||
|  | ||||
|     def _scramble_word(self, word: str, corruption_level: float, rng: Random) -> str: | ||||
|         """Scramble a word by swapping random pairs of characters""" | ||||
|         if len(word) < 2:  # Can't scramble 1-character words | ||||
|             return word | ||||
|  | ||||
|         word = list(word) | ||||
|         num_swaps = max(1, int(len(word) * corruption_level))  # Ensure at least one swap | ||||
|  | ||||
|         for _ in range(num_swaps): | ||||
|             # Pick two different random positions | ||||
|             pos1, pos2 = rng.sample(range(len(word)), 2) | ||||
|             # Swap characters | ||||
|             word[pos1], word[pos2] = word[pos2], word[pos1] | ||||
|  | ||||
|         return "".join(word) | ||||
|  | ||||
|     def __getitem__(self, idx: int) -> dict: | ||||
|         """Generate a single word jumbling task""" | ||||
|         rng = Random(self.seed + idx) | ||||
|  | ||||
|         # Select number of words and corruption level | ||||
|         num_words = rng.randint(self.config.min_words, self.config.max_words) | ||||
|         corruption_level = rng.uniform(self.config.min_corruption_level, self.config.max_corruption_level) | ||||
|  | ||||
|         # Select words based on configuration | ||||
|         if self.config.consecutive_words: | ||||
|             # Select consecutive words from a random starting position | ||||
|             start_idx = rng.randint(0, len(self.words) - num_words) | ||||
|             selected_words = self.words[start_idx : start_idx + num_words] | ||||
|         else: | ||||
|             # Select random words | ||||
|             selected_words = rng.sample(self.words, num_words) | ||||
|  | ||||
|         # Scramble each word | ||||
|         scrambled_words = [self._scramble_word(word, corruption_level, rng) for word in selected_words] | ||||
|  | ||||
|         return { | ||||
|             "question": f"Unscramble these words: {' '.join(scrambled_words)}", | ||||
|             "answer": " ".join(selected_words), | ||||
|             "metadata": { | ||||
|                 "num_words": num_words, | ||||
|                 "corruption_level": corruption_level, | ||||
|                 "scrambled_words": scrambled_words, | ||||
|                 "original_words": selected_words, | ||||
|             }, | ||||
|         The metadata structure from the template system: | ||||
|         { | ||||
|             "scrambled": { | ||||
|                 "scrambled_words": str,  # Space-separated scrambled words | ||||
|                 "original_words": List[str]  # List of original words | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         Args: | ||||
|             metadata: The metadata containing the expression information. | ||||
|  | ||||
| register_dataset("letter_jumble", LetterJumbleDataset, LetterJumbleConfig) | ||||
|         Returns: | ||||
|             A dictionary containing: | ||||
|                 - scrambled_words: List[str] of scrambled words | ||||
|                 - original_words: List[str] of original words | ||||
|         """ | ||||
|         # Extract the scrambled and original words from metadata | ||||
|         template_data = metadata["scrambled"] | ||||
|         scrambled_words = template_data["scrambled_words"].split() | ||||
|         original_words = template_data["original_words"] | ||||
|  | ||||
|         return { | ||||
|             "scrambled_words": scrambled_words, | ||||
|             "original_words": original_words | ||||
|         } | ||||
|  | ||||
|     def _evaluate_expression(self, parsed_data: Dict[str, Any]) -> str: | ||||
|         """Evaluate the expression using the parsed data. | ||||
|  | ||||
|         Args: | ||||
|             parsed_data: Dictionary containing: | ||||
|                 - scrambled_words: List[str] of scrambled words | ||||
|                 - original_words: List[str] of original words | ||||
|  | ||||
|         Returns: | ||||
|             The answer string (space-separated original words). | ||||
|         """ | ||||
|         return " ".join(parsed_data["original_words"]) | ||||
|   | ||||
| @@ -1,8 +1,10 @@ | ||||
| from .base_conversion_curriculum import BaseConversionCurriculum | ||||
| from .caesar_cipher_curriculum import CaesarCipherCurriculum | ||||
| from .letter_counting_curriculum import LetterCountingCurriculum | ||||
| from .letter_jumble_curriculum import LetterJumbleCurriculum | ||||
| __all__ = [ | ||||
|     "BaseConversionCurriculum", | ||||
| 	"CaesarCipherCurriculum", | ||||
| 	"LetterCountingCurriculum" | ||||
| 	"LetterCountingCurriculum", | ||||
| 	"LetterJumbleCurriculum" | ||||
| ] | ||||
|   | ||||
							
								
								
									
										122
									
								
								reasoning_gym/curricula/algorithmic/letter_jumble_curriculum.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										122
									
								
								reasoning_gym/curricula/algorithmic/letter_jumble_curriculum.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,122 @@ | ||||
| """ | ||||
| Curriculum definition for letter jumble exercises. | ||||
| """ | ||||
|  | ||||
| from typing import Dict, Any | ||||
| from reasoning_gym.core.base_curriculum import BaseCurriculum | ||||
| from reasoning_gym.core.attributes import AttributeDefinition, AttributeType | ||||
| from reasoning_gym.core.template import Template | ||||
| from reasoning_gym.data import read_data_file | ||||
|  | ||||
| class LetterJumbleCurriculum(BaseCurriculum): | ||||
|     def __init__(self): | ||||
|         super().__init__("LetterJumbleCurriculum") | ||||
|         import re | ||||
|         self.words = [word for word in re.findall(r"\b\w+\b", read_data_file("in_the_year_2889.txt")) if word.isalpha()] | ||||
|  | ||||
|     def _init_curriculum(self) -> None: | ||||
|         """Initialize the letter jumble curriculum configuration""" | ||||
|         # Define valid attribute types | ||||
|         self._valid_types = { | ||||
|             AttributeType.STATIC,   # For boolean flags | ||||
|             AttributeType.UBOUND,   # For ranges like word length, num words | ||||
|             AttributeType.APPEND    # For accumulating options | ||||
|         } | ||||
|  | ||||
|         # Define attributes | ||||
|         self._attributes = { | ||||
|             "word_length": AttributeDefinition( | ||||
|                 levels=[7, 12, 64],  # From min_word_len/max_word_len | ||||
|                 default_level=0, | ||||
|                 description="Maximum word length", | ||||
|                 attr_type=AttributeType.UBOUND, | ||||
|                 min_value=1  # Ensure at least 2 chars for scrambling | ||||
|             ), | ||||
|             "preserve_length": AttributeDefinition( | ||||
|                 levels=[4, 2], | ||||
|                 default_level=0, | ||||
|                 description="Word length to preserve", | ||||
|                 attr_type=AttributeType.STATIC | ||||
|             ), | ||||
|             "num_words": AttributeDefinition( | ||||
|                 levels=[3, 5, 20],  # From min_words/max_words | ||||
|                 default_level=0, | ||||
|                 description="Number of words to scramble", | ||||
|                 attr_type=AttributeType.UBOUND, | ||||
|                 min_value=1  # Ensure at least 1 word | ||||
|             ), | ||||
|             "corruption_level": AttributeDefinition( | ||||
|                 levels=[0.1, 0.3, 0.9],  # From min/max_corruption_level | ||||
|                 default_level=0, | ||||
|                 description="Fraction of characters to swap", | ||||
|                 attr_type=AttributeType.UBOUND, | ||||
|                 min_value=0.1 | ||||
|             ), | ||||
|             "consecutive_words": AttributeDefinition( | ||||
|                 levels=[True, False], | ||||
|                 default_level=0, | ||||
|                 description="Whether to select consecutive words", | ||||
|                 attr_type=AttributeType.APPEND | ||||
|             ) | ||||
|         } | ||||
|  | ||||
|         # Define templates with symbolic placeholders | ||||
|         self._templates = [ | ||||
|             Template( | ||||
|                 template="Unscramble these words: \"{scrambled}\"", | ||||
|                 parts={"scrambled": "word_list"} | ||||
|             ), | ||||
|             Template( | ||||
|                 template="What are the original words? \"{scrambled}\"", | ||||
|                 parts={"scrambled": "word_list"} | ||||
|             ), | ||||
|             Template( | ||||
|                 template="Rearrange the letters to find the original words: \"{scrambled}\"", | ||||
|                 parts={"scrambled": "word_list"} | ||||
|             ) | ||||
|         ] | ||||
|  | ||||
|         # Define symbolic structure | ||||
|         self._symbolic = { | ||||
|             # Shared variables that need to be consistent across templates | ||||
|             "shared_vars": { | ||||
|                 # Selected original words that will be scrambled | ||||
|                 "selected_words": lambda refs: ( | ||||
|                     n_words := refs["num_words"](), | ||||
|                     pool := self.words, | ||||
|                     refs["dataset_rng"].sample(pool, n_words) if not refs["consecutive_words"]() else | ||||
| 						( | ||||
|                             start := refs["dataset_rng"].randint(0, max(0, len(pool)-n_words)), | ||||
|                             pool[start:start + n_words] | ||||
|                         )[-1] | ||||
|                 )[-1] | ||||
|             }, | ||||
|             # Value generators for dynamic content | ||||
|             "generators": { | ||||
|                 # Scramble a single word based on corruption level | ||||
|                 "scramble_word": lambda refs: lambda lst: ( | ||||
|                     [ | ||||
|                         (i, j, lst.__setitem__(i, lst[j]), lst.__setitem__(j, temp)) # Debugging: keep track of indices and assignments | ||||
|                         for _ in range(max(0, int(len(lst) * refs["corruption_level"]()))) | ||||
|                         for i, j in [refs["dataset_rng"].sample(range(len(lst)), 2)] | ||||
|                         for temp in [lst[i]] # Introduce temp variable for correct swap | ||||
|                     ], | ||||
|                     "".join(lst) | ||||
|                 )[-1], | ||||
|                 # Generate scrambled version of all selected words | ||||
|                 "scramble_all": lambda refs: lambda: [ | ||||
|                     refs["scramble_word"](refs)(list(word)) if len(word) > refs["preserve_length"]() else word | ||||
|                     for word in refs["selected_words"](refs) | ||||
|                 ] | ||||
|             }, | ||||
|             # Template composition | ||||
|             "templates": { | ||||
|                 "word_list": lambda refs: { | ||||
|                     "template": "{scrambled_words}", | ||||
|                     "parts": { | ||||
|                         "scrambled_words": lambda refs=refs: " ".join(refs["scramble_all"](refs)()), | ||||
|                         "original_words": lambda refs=refs: refs["selected_words"](refs) | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
| @@ -9,7 +9,7 @@ Algorithmic tasks for training reasoning capabilities: | ||||
| from .base_conversion import BaseConversionExercise | ||||
| from .caesar_cipher import CaesarCipherExercise | ||||
| from .letter_counting import LetterCountingExercise | ||||
| # from .letter_jumble import LetterJumbleExercise | ||||
| from .letter_jumble import LetterJumbleExercise | ||||
| # from .number_filtering import NumberFilteringExercise | ||||
| # from .number_sorting import NumberSortingExercise | ||||
| # from .sentence_reordering import SentenceReorderingExercise | ||||
| @@ -23,7 +23,7 @@ __all__ = [ | ||||
|     "BaseConversionExercise", | ||||
|     "CaesarCipherExercise", | ||||
|     "LetterCountingExercise", | ||||
|     # "LetterJumbleDataset", | ||||
|     "LetterJumbleExercise", | ||||
|     # "NumberFilteringDataset", | ||||
|     # "NumberSortingDataset", | ||||
|     # "SentenceReorderingDataset", | ||||
|   | ||||
| @@ -1,121 +1,289 @@ | ||||
| """Tests for letter jumbling task generation""" | ||||
| """Unit tests for the letter jumble exercise.""" | ||||
|  | ||||
| from random import Random | ||||
| from reasoning_gym.curricula.algorithmic.letter_jumble_curriculum import LetterJumbleCurriculum | ||||
| from reasoning_gym.exercises.algorithmic.letter_jumble import LetterJumbleExercise | ||||
| import unittest | ||||
| import random | ||||
| from collections import defaultdict | ||||
|  | ||||
| import pytest | ||||
| class TestLetterJumbleParsing(unittest.TestCase): | ||||
|     """Test parsing of letter jumble metadata""" | ||||
|  | ||||
| from reasoning_gym.algorithmic.letter_jumble import LetterJumbleConfig, LetterJumbleDataset | ||||
|     def setUp(self): | ||||
|         self.exercise = LetterJumbleExercise() | ||||
|  | ||||
|     def test_parse_expression_basic(self): | ||||
|         """Test parsing of basic letter jumble metadata""" | ||||
|         test_metadata = { | ||||
|             "scrambled": { | ||||
|                 "scrambled_words": "EHLLO DLWOR", | ||||
|                 "original_words": ["HELLO", "WORLD"] | ||||
|             } | ||||
|         } | ||||
|         parsed = self.exercise._parse_expression(test_metadata) | ||||
|         self.assertEqual(parsed["scrambled_words"], ["EHLLO", "DLWOR"]) | ||||
|         self.assertEqual(parsed["original_words"], ["HELLO", "WORLD"]) | ||||
|  | ||||
| def test_letter_jumble_config_validation(): | ||||
|     """Test that invalid configs raise appropriate errors""" | ||||
|     with pytest.raises(AssertionError): | ||||
|         config = LetterJumbleConfig(min_word_len=0) | ||||
|         config.validate() | ||||
|     def test_parse_with_spaces(self): | ||||
|         """Test parsing with spaces and punctuation""" | ||||
|         test_metadata = { | ||||
|             "scrambled": { | ||||
|                 "scrambled_words": "EHLLO DLWOR!", | ||||
|                 "original_words": ["HELLO", "WORLD!"] | ||||
|             } | ||||
|         } | ||||
|         parsed = self.exercise._parse_expression(test_metadata) | ||||
|         self.assertEqual(parsed["scrambled_words"], ["EHLLO", "DLWOR!"]) | ||||
|         self.assertEqual(parsed["original_words"], ["HELLO", "WORLD!"]) | ||||
|  | ||||
|     with pytest.raises(AssertionError): | ||||
|         config = LetterJumbleConfig(min_words=10, max_words=5) | ||||
|         config.validate() | ||||
|     def test_parse_mixed_case(self): | ||||
|         """Test parsing with mixed case text""" | ||||
|         test_metadata = { | ||||
|             "scrambled": { | ||||
|                 "scrambled_words": "HeLlO WoRlD", | ||||
|                 "original_words": ["hElLo", "wOrLd"] | ||||
|             } | ||||
|         } | ||||
|         parsed = self.exercise._parse_expression(test_metadata) | ||||
|         self.assertEqual(parsed["scrambled_words"], ["HeLlO", "WoRlD"]) | ||||
|         self.assertEqual(parsed["original_words"], ["hElLo", "wOrLd"]) | ||||
|  | ||||
|     with pytest.raises(AssertionError): | ||||
|         config = LetterJumbleConfig(min_corruption_level=-0.1) | ||||
|         config.validate() | ||||
| class TestLetterJumbleEvaluation(unittest.TestCase): | ||||
|     """Test evaluation of letter jumble problems""" | ||||
|  | ||||
|     with pytest.raises(AssertionError): | ||||
|         config = LetterJumbleConfig(max_corruption_level=1.1) | ||||
|         config.validate() | ||||
|     def setUp(self): | ||||
|         self.exercise = LetterJumbleExercise() | ||||
|  | ||||
|     def test_basic_unscrambling(self): | ||||
|         """Test basic unscrambling cases""" | ||||
|         test_cases = [ | ||||
|             (["EHLLO"], "HELLO"),    # Single word | ||||
|             (["EHLLO", "DLWOR"], "HELLO WORLD"),  # Two words | ||||
|             (["AAAA"], "AAAA"),      # Same letters | ||||
|             (["ZBAC"], "ABCZ"),      # Sorted order | ||||
|             (["HELLO"], "HELLO")      # Already unscrambled | ||||
|         ] | ||||
|         for scrambled, expected in test_cases: | ||||
|             parsed = { | ||||
|                 "scrambled_words": scrambled, | ||||
|                 "original_words": expected.split() | ||||
|             } | ||||
|             result = self.exercise._evaluate_expression(parsed) | ||||
|             self.assertEqual(result, expected) | ||||
|  | ||||
| def test_letter_jumble_deterministic(): | ||||
|     """Test that dataset generates same items with same seed""" | ||||
|     config = LetterJumbleConfig(seed=42, size=10) | ||||
|     dataset1 = LetterJumbleDataset(config) | ||||
|     dataset2 = LetterJumbleDataset(config) | ||||
|     def test_mixed_case_unscrambling(self): | ||||
|         """Test unscrambling with mixed case""" | ||||
|         test_cases = [ | ||||
|             (["HeLlO"], "hElLo"),    # Mixed case, single word | ||||
|             (["WoRlD", "HeLlO"], "wOrLd hElLo"),  # Mixed case, multiple words | ||||
|             (["AbCdE"], "aBcDe")     # Mixed case, alternating | ||||
|         ] | ||||
|         for scrambled, expected in test_cases: | ||||
|             parsed = { | ||||
|                 "scrambled_words": scrambled, | ||||
|                 "original_words": expected.split() | ||||
|             } | ||||
|             result = self.exercise._evaluate_expression(parsed) | ||||
|             self.assertEqual(result, expected) | ||||
|  | ||||
|     for i in range(len(dataset1)): | ||||
|         assert dataset1[i] == dataset2[i] | ||||
|     def test_with_spaces_and_punctuation(self): | ||||
|         """Test unscrambling with spaces and punctuation""" | ||||
|         test_cases = [ | ||||
|             (["EHLLO!", "DLWOR?"], "HELLO! WORLD?"), | ||||
|             (["EHLLO.", "DLWOR."], "HELLO. WORLD."), | ||||
|             (["EHLLO,", "DLWOR,"], "HELLO, WORLD,") | ||||
|         ] | ||||
|         for scrambled, expected in test_cases: | ||||
|             parsed = { | ||||
|                 "scrambled_words": scrambled, | ||||
|                 "original_words": expected.split() | ||||
|             } | ||||
|             result = self.exercise._evaluate_expression(parsed) | ||||
|             self.assertEqual(result, expected) | ||||
|  | ||||
| class TestLetterJumbleGeneration(unittest.TestCase): | ||||
|     """Test problem generation""" | ||||
|  | ||||
| def test_letter_jumble_scrambling(): | ||||
|     """Test the word scrambling logic""" | ||||
|     config = LetterJumbleConfig( | ||||
|         min_word_len=4, | ||||
|         max_word_len=8, | ||||
|         min_words=1, | ||||
|         max_words=1, | ||||
|         min_corruption_level=0.5, | ||||
|         max_corruption_level=0.5, | ||||
|         size=1, | ||||
|         seed=42, | ||||
|     ) | ||||
|     dataset = LetterJumbleDataset(config) | ||||
|     def setUp(self): | ||||
|         self.curriculum = LetterJumbleCurriculum() | ||||
|         self.exercise = LetterJumbleExercise() | ||||
|         self.rng = random.Random(42) | ||||
|         self.curriculum.rng = self.rng | ||||
|  | ||||
|     # Test with known word | ||||
|     word = "testing" | ||||
|     rng = Random(42) | ||||
|     scrambled = dataset._scramble_word(word, 0.5, rng) | ||||
|     def test_problem_structure(self): | ||||
|         """Test that generated problems have the correct structure""" | ||||
|         problem = self.exercise.generate(self.curriculum) | ||||
|  | ||||
|     # Verify scrambled word: | ||||
|     # - Has same length as original | ||||
|     assert len(scrambled) == len(word) | ||||
|     # - Contains same characters | ||||
|     assert sorted(scrambled) == sorted(word) | ||||
|     # - Is different from original (with high probability given 0.5 corruption) | ||||
|     assert scrambled != word | ||||
|         # Check basic structure | ||||
|         self.assertIn("question", problem) | ||||
|         self.assertIn("answer", problem) | ||||
|         self.assertIn("metadata", problem) | ||||
|  | ||||
|         # Check metadata structure | ||||
|         metadata = problem["metadata"] | ||||
|         self.assertEqual(metadata["type"], "direct") | ||||
|         self.assertIn("executed_parts", metadata) | ||||
|         executed_parts = metadata["executed_parts"] | ||||
|         self.assertIn("scrambled_words", executed_parts) | ||||
|         self.assertIn("original_words", executed_parts) | ||||
|  | ||||
| def test_letter_jumble_dataset_items(): | ||||
|     """Test basic properties of generated items""" | ||||
|     config = LetterJumbleConfig( | ||||
|         min_word_len=4, | ||||
|         max_word_len=8, | ||||
|         min_words=3, | ||||
|         max_words=5, | ||||
|         min_corruption_level=0.1, | ||||
|         max_corruption_level=0.3, | ||||
|         size=50, | ||||
|         seed=42, | ||||
|     ) | ||||
|     dataset = LetterJumbleDataset(config) | ||||
|     def test_word_length_ranges(self): | ||||
|         """Test that word lengths are within expected ranges""" | ||||
|         # Test all word length levels | ||||
|         level_max_lengths = {0: 5, 1: 8, 2: 64} | ||||
|  | ||||
|     for i in range(len(dataset)): | ||||
|         item = dataset[i] | ||||
|         for level, max_length in level_max_lengths.items(): | ||||
|             self.curriculum.set_attr_level("word_length", level) | ||||
|             problem = self.exercise.generate(self.curriculum) | ||||
|             words = problem["metadata"]["executed_parts"]["original_words"] | ||||
|             for word in words: | ||||
|                 self.assertLessEqual(len(word), max_length) | ||||
|                 self.assertGreaterEqual(len(word), 2)  # Min length is 2 | ||||
|  | ||||
|         # Check item structure | ||||
|         assert isinstance(item, dict) | ||||
|         assert "question" in item | ||||
|         assert "answer" in item | ||||
|         assert "metadata" in item | ||||
|     def test_word_count_ranges(self): | ||||
|         """Test that word counts are within expected ranges""" | ||||
|         # Test all word count levels | ||||
|         level_word_counts = {0: 3, 1: 5, 2: 20} | ||||
|  | ||||
|         # Check metadata | ||||
|         metadata = item["metadata"] | ||||
|         assert "num_words" in metadata | ||||
|         assert "corruption_level" in metadata | ||||
|         assert "scrambled_words" in metadata | ||||
|         assert "original_words" in metadata | ||||
|         for level, max_words in level_word_counts.items(): | ||||
|             self.curriculum.set_attr_level("num_words", level) | ||||
|             problem = self.exercise.generate(self.curriculum) | ||||
|             words = problem["metadata"]["executed_parts"]["original_words"] | ||||
|             self.assertLessEqual(len(words), max_words) | ||||
|             self.assertGreaterEqual(len(words), 1)  # Min words is 1 | ||||
|  | ||||
|         # Verify word counts | ||||
|         num_words = metadata["num_words"] | ||||
|         assert config.min_words <= num_words <= config.max_words | ||||
|         assert len(metadata["scrambled_words"]) == num_words | ||||
|         assert len(metadata["original_words"]) == num_words | ||||
| class TestLetterJumbleComprehensive(unittest.TestCase): | ||||
|     """Comprehensive tests for letter jumble""" | ||||
|  | ||||
|         # Verify corruption level | ||||
|         assert config.min_corruption_level <= metadata["corruption_level"] <= config.max_corruption_level | ||||
|     def setUp(self): | ||||
|         self.curriculum = LetterJumbleCurriculum() | ||||
|         self.exercise = LetterJumbleExercise() | ||||
|         self.rng = random.Random(42) | ||||
|         self.curriculum.rng = self.rng | ||||
|  | ||||
|         # Verify word properties | ||||
|         for word in metadata["original_words"]: | ||||
|             assert config.min_word_len <= len(word) <= config.max_word_len | ||||
|             assert word.isalpha() | ||||
|     def test_corruption_levels(self): | ||||
|         """Test different corruption levels""" | ||||
|         corruption_levels = [0.1, 0.3, 0.9] | ||||
|         num_samples = 100  # Test with multiple samples | ||||
|  | ||||
|         # Test each level | ||||
|         for level, expected_corruption in enumerate(corruption_levels): | ||||
|             self.curriculum.set_attr_level("corruption_level", level) | ||||
|             differences = [] | ||||
|  | ||||
| def test_letter_jumble_iteration(): | ||||
|     """Test that iteration respects dataset size""" | ||||
|     config = LetterJumbleConfig(size=5, seed=42) | ||||
|     dataset = LetterJumbleDataset(config) | ||||
|             # Generate multiple problems to measure average corruption | ||||
|             for _ in range(num_samples): | ||||
|                 problem = self.exercise.generate(self.curriculum) | ||||
|                 metadata = problem["metadata"]["executed_parts"] | ||||
|                 # Calculate character differences | ||||
|                 preserve_len = self.curriculum.attributes["preserve_length"].levels[self.curriculum.get_attr_level("preserve_length")] | ||||
|                 for orig, scrambled in zip(metadata["original_words"], metadata["scrambled_words"]): | ||||
|                     if len(orig) > preserve_len: | ||||
|                         diff_count = sum(1 for a, b in zip(orig, scrambled) if a != b) | ||||
|                         differences.append(diff_count / len(orig)) | ||||
|  | ||||
|     items = list(dataset) | ||||
|     assert len(items) == config.size | ||||
|             # Check average corruption level is reasonable | ||||
|             # It's okay if actual corruption is lower than target due to: | ||||
|             # 1. Some swaps might cancel out previous swaps | ||||
|             # 2. The same characters might be swapped multiple times | ||||
|             # 3. The preserve_length attribute prevents some characters from being swapped | ||||
|             # 4. For short words, even a few swaps can make them readable | ||||
|             if differences: | ||||
|                 avg_corruption = sum(differences) / len(differences) | ||||
|                 # Only check that we don't exceed target by too much | ||||
|                 self.assertLess(avg_corruption, expected_corruption + 0.1, | ||||
|                               f"Corruption level {avg_corruption:.2f} too high (target: {expected_corruption:.2f})") | ||||
|                 # And ensure we have some corruption | ||||
|                 self.assertGreater(avg_corruption, 0.02, | ||||
|                                  f"Corruption level {avg_corruption:.2f} too low (should be above 0.02)") | ||||
|  | ||||
|     # Test multiple iterations yield same items | ||||
|     assert items == list(dataset) | ||||
|     def test_template_variation(self): | ||||
|         """Test that different templates are used""" | ||||
|         templates_seen = set() | ||||
|         num_samples = 100 | ||||
|  | ||||
|         for _ in range(num_samples): | ||||
|             problem = self.exercise.generate(self.curriculum) | ||||
|             templates_seen.add(problem["question"].split(":")[0]) | ||||
|  | ||||
|         self.assertGreater(len(templates_seen), 1, "Not enough template variation") | ||||
|  | ||||
|     def test_comprehensive_random_evaluation(self): | ||||
|         """Test random evaluation with various configurations and track statistics.""" | ||||
|         self.rng = random.Random(42)  # Fixed seed for reproducibility | ||||
|         self.curriculum.rng = self.rng | ||||
|  | ||||
|         # Track statistics | ||||
|         word_lengths = defaultdict(int) | ||||
|         word_counts = defaultdict(int) | ||||
|         corruption_levels = defaultdict(list) | ||||
|         consecutive_words_count = 0 | ||||
|         total_samples = 1000 | ||||
|  | ||||
|         # Generate test cases | ||||
|         for _ in range(total_samples): | ||||
|             # Set random attribute levels | ||||
|             for attr in self.curriculum.attributes: | ||||
|                 max_level = len(self.curriculum.attributes[attr].levels) - 1 | ||||
|                 self.curriculum.set_attr_level(attr, self.rng.randint(0, max_level)) | ||||
|  | ||||
|             # Generate and evaluate a random problem | ||||
|             problem = self.exercise.generate(self.curriculum) | ||||
|             metadata = problem["metadata"]["executed_parts"] | ||||
|             original_words = metadata["original_words"] | ||||
|             scrambled_words = metadata["scrambled_words"] | ||||
|  | ||||
|             # Track statistics | ||||
|             word_counts[len(original_words)] += 1 | ||||
|             for word in original_words: | ||||
|                 word_lengths[len(word)] += 1 | ||||
|  | ||||
|             # Calculate corruption levels | ||||
|             for orig, scrambled in zip(original_words, scrambled_words): | ||||
|                 preserve_len = self.curriculum.attributes["preserve_length"].levels[self.curriculum.get_attr_level("preserve_length")] | ||||
|                 if len(orig) > preserve_len: | ||||
|                     diff_count = sum(1 for a, b in zip(orig, scrambled) if a != b) | ||||
|                     corruption_levels[len(orig)].append(diff_count / len(orig)) | ||||
|  | ||||
|             # Check if words are consecutive in source text | ||||
|             if len(original_words) > 1: | ||||
|                 text = " ".join(self.curriculum.words) | ||||
|                 phrase = " ".join(original_words) | ||||
|                 if phrase in text: | ||||
|                     consecutive_words_count += 1 | ||||
|  | ||||
|             # Verify scrambling is valid | ||||
|             for orig, scrambled in zip(original_words, scrambled_words): | ||||
|                 # Check lengths match | ||||
|                 self.assertEqual(len(orig), len(scrambled)) | ||||
|                 # Check same letters are used | ||||
|                 self.assertEqual(sorted(orig), sorted(scrambled)) | ||||
|  | ||||
|         # Print statistics | ||||
|         print("\nWord length distribution:") | ||||
|         for length, count in sorted(word_lengths.items()): | ||||
|             print(f"  Length {length}: {count}") | ||||
|  | ||||
|         print("\nWord count distribution:") | ||||
|         for count, freq in sorted(word_counts.items()): | ||||
|             print(f"  {count} words: {freq}") | ||||
|  | ||||
|         print("\nAverage corruption levels by word length:") | ||||
|         for length, levels in sorted(corruption_levels.items()): | ||||
|             avg = sum(levels) / len(levels) if levels else 0 | ||||
|             print(f"  Length {length}: {avg:.2f}") | ||||
|  | ||||
|         print(f"\nConsecutive words: {consecutive_words_count}/{total_samples}") | ||||
|  | ||||
|         # Verify statistical properties | ||||
|         self.assertTrue(any(length >= 8 for length in word_lengths),  | ||||
|                        "No long words generated") | ||||
|         self.assertTrue(any(count >= 3 for count in word_counts.values()), | ||||
|                        "Not enough variation in word counts") | ||||
|         self.assertTrue(consecutive_words_count > 0, | ||||
|                        "No consecutive words generated") | ||||
|         self.assertTrue(consecutive_words_count < total_samples, | ||||
|                        "Too many consecutive words") | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
		Reference in New Issue
	
	Block a user
	 EduardDurech
					EduardDurech