From 22d21b9b23af88369f4b40f6b616d623c3b20b58 Mon Sep 17 00:00:00 2001
From: STEVENTAN100 <93769514+STEVENTAN100@users.noreply.github.com>
Date: Tue, 21 Oct 2025 10:28:30 +0800
Subject: [PATCH 1/2] Refactor context budget control return values and correct
 attribute name error

Refactor context budget control to always return a 4-tuple for consistency. Correct attribute name `ppl_chunking` error.
---
 experiments/module-summarization/code_compressor.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/experiments/module-summarization/code_compressor.py b/experiments/module-summarization/code_compressor.py
index 96d1cd6..ba3afb6 100644
--- a/experiments/module-summarization/code_compressor.py
+++ b/experiments/module-summarization/code_compressor.py
@@ -213,7 +213,7 @@ class CodeCompressor:
         self.load_model(model_name, device_map, model_config)
         
         logger.debug("Initializing Entropy chunking...")
-        self.ppl_chunking = EntropyChunking()
+        self.entropy_chunking = EntropyChunking()
         
         # Add caching system for model outputs and token information
         self.cache = {
@@ -576,7 +576,9 @@ class CodeCompressor:
         start_time = time.time()
         
         if not context_list:
-            return [], [], []
+            # Always return a 4-tuple: (selected_contexts, used_indices, dynamic_ratio, demonstrations_sort)
+            # Keep API consistent for callers that unpack 4 values
+            return [], [], [], []
         
         # Get token counts for each context
         logger.debug("Calculating token lengths for contexts")
@@ -588,7 +590,9 @@ class CodeCompressor:
             logger.debug(f"All contexts fit within budget ({total_tokens} <= {target_token})")
             end_time = time.time()
             logger.debug(f"Context budget control completed in {end_time - start_time:.2f} seconds")
-            return context_list, list(range(len(context_list))), [0.0] * len(context_list)
+            # Build a default demonstrations_sort with zero scores to preserve structure
+            demonstrations_sort = list(zip(range(len(context_list)), [0.0] * len(context_list)))
+            return context_list, list(range(len(context_list))), [0.0] * len(context_list), demonstrations_sort
         
         # Rank contexts by relevance if question is provided
         logger.debug("Ranking contexts by relevance")
@@ -1883,4 +1887,4 @@ if __name__ == "__main__":
         min_lines_for_fine_grained=5,
         importance_beta=0.5
     )
-    logger.info(f"Compressed code (using {result_cond['fine_grained_method_used']}): \n{result_cond['compressed_code']}")
\ No newline at end of file
+    logger.info(f"Compressed code (using {result_cond['fine_grained_method_used']}): \n{result_cond['compressed_code']}")

From 5478b117c8d17870f338697495d9f522a3711812 Mon Sep 17 00:00:00 2001
From: STEVENTAN100 <93769514+STEVENTAN100@users.noreply.github.com>
Date: Tue, 21 Oct 2025 10:32:54 +0800
Subject: [PATCH 2/2] Add gpu_memory_utilization to engine arguments

Prevent shortage of GPU memory.
---
 experiments/module-summarization/main.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/experiments/module-summarization/main.py b/experiments/module-summarization/main.py
index 260c26b..9e8f4a3 100644
--- a/experiments/module-summarization/main.py
+++ b/experiments/module-summarization/main.py
@@ -19,7 +19,7 @@ from itertools import cycle
 class LLMGenerator:
     def __init__(self, model_name, device, **model_args):
         # Create a vllm LLM instance
-        engine_args = EngineArgs(model=model_name, device=device, **model_args)
+        engine_args = EngineArgs(model=model_name, gpu_memory_utilization=0.8, device=device, **model_args)
         self.model = LLM(**vars(engine_args))
         self.model_name = model_name
         self.device = device
@@ -58,7 +58,7 @@ class LLMGenerator:
 class LLMScorer:
     def __init__(self, model_name, device, **model_args):
         # Create a vllm LLM instance
-        engine_args = EngineArgs(model=model_name, device=device, **model_args)
+        engine_args = EngineArgs(model=model_name, gpu_memory_utilization=0.8, device=device, **model_args)
         self.model = LLM(**vars(engine_args))
         self.model_name = model_name
         self.device = device
@@ -1315,4 +1315,4 @@ def run_documentation_task(
 
 if __name__ == "__main__":
 
-    fire.Fire(run_documentation_task)
\ No newline at end of file
+    fire.Fire(run_documentation_task)