Refactor context budget control return values and correct attribute name error

Refactor context budget control to always return a 4-tuple for consistency. Correct attribute name `ppl_chunking` error.
2025-10-22 23:19:46 +03:00 · 2025-10-21 10:28:30 +08:00
parent 71248b737f
commit 22d21b9b23
1 changed files with 8 additions and 4 deletions
--- a/experiments/module-summarization/code_compressor.py
+++ b/experiments/module-summarization/code_compressor.py
@@ -213,7 +213,7 @@ class CodeCompressor:
        self.load_model(model_name, device_map, model_config)
        
        logger.debug("Initializing Entropy chunking...")
-        self.ppl_chunking = EntropyChunking()
+        self.entropy_chunking = EntropyChunking()
        
        # Add caching system for model outputs and token information
        self.cache = {
@@ -576,7 +576,9 @@ class CodeCompressor:
        start_time = time.time()
        
        if not context_list:
-            return [], [], []
+            # Always return a 4-tuple: (selected_contexts, used_indices, dynamic_ratio, demonstrations_sort)
+            # Keep API consistent for callers that unpack 4 values
+            return [], [], [], []
        
        # Get token counts for each context
        logger.debug("Calculating token lengths for contexts")
@@ -588,7 +590,9 @@ class CodeCompressor:
            logger.debug(f"All contexts fit within budget ({total_tokens} <= {target_token})")
            end_time = time.time()
            logger.debug(f"Context budget control completed in {end_time - start_time:.2f} seconds")
-            return context_list, list(range(len(context_list))), [0.0] * len(context_list)
+            # Build a default demonstrations_sort with zero scores to preserve structure
+            demonstrations_sort = list(zip(range(len(context_list)), [0.0] * len(context_list)))
+            return context_list, list(range(len(context_list))), [0.0] * len(context_list), demonstrations_sort
        
        # Rank contexts by relevance if question is provided
        logger.debug("Ranking contexts by relevance")
@@ -1883,4 +1887,4 @@ if __name__ == "__main__":
        min_lines_for_fine_grained=5,
        importance_beta=0.5
    )
-    logger.info(f"Compressed code (using {result_cond['fine_grained_method_used']}): \n{result_cond['compressed_code']}")
+    logger.info(f"Compressed code (using {result_cond['fine_grained_method_used']}): \n{result_cond['compressed_code']}")