From 6d6f8503ccc00a6bc6c362c6fdc7fce9857bbd85 Mon Sep 17 00:00:00 2001
From: Asankhaya Sharma <codelion@users.noreply.github.com>
Date: Fri, 16 May 2025 10:05:18 +0800
Subject: [PATCH] fix edge cases

---
 optillm/plugins/spl/evaluation.py | 26 +++++++++++++++++++++-----
 optillm/plugins/spl/main.py       |  2 +-
 2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/optillm/plugins/spl/evaluation.py b/optillm/plugins/spl/evaluation.py
index a05bcb2..87d07f8 100644
--- a/optillm/plugins/spl/evaluation.py
+++ b/optillm/plugins/spl/evaluation.py
@@ -21,17 +21,24 @@ from optillm.plugins.spl.config import (
 # Setup logging
 logger = logging.getLogger(__name__)
 
-def select_relevant_strategies(query: str, problem_type: str, db: Any, max_strategies: int = MAX_STRATEGIES_FOR_INFERENCE) -> List[Strategy]:
+def select_relevant_strategies(query: str, problem_type: str, db: Any, learning_mode: bool = False, max_strategies: int = MAX_STRATEGIES_FOR_INFERENCE) -> List[Strategy]:
     """
     Select the most relevant strategies for a given problem to be used during inference.
     This controls how many strategies are included in the system prompt augmentation.
     
-    Only selects strategies of the matching problem type with success rate >= MIN_SUCCESS_RATE_FOR_INFERENCE.
+    When in inference mode (not learning_mode), only strategies with:
+     - A matching problem type 
+     - Success rate >= MIN_SUCCESS_RATE_FOR_INFERENCE
+     - At least 5 attempts
+    are selected.
+    
+    In learning mode, strategies with fewer attempts are also considered.
     
     Args:
         query: The problem/query text
         problem_type: The type of problem
         db: Strategy database
+        learning_mode: Whether we're in learning mode (affects filtering criteria)
         max_strategies: Maximum number of strategies to return
     
     Returns:
@@ -41,13 +48,22 @@ def select_relevant_strategies(query: str, problem_type: str, db: Any, max_strat
     type_specific = db.get_strategies_for_problem(problem_type)
     logger.info(f"Found {len(type_specific)} strategies for problem type '{problem_type}'")
     
-    # Filter strategies by minimum success rate
+    # Filter strategies by minimum success rate and attempts
     qualified_strategies = []
     for strategy in type_specific:
-        if strategy.success_rate >= MIN_SUCCESS_RATE_FOR_INFERENCE or strategy.total_attempts < 5:
+        # In learning mode, we're more lenient with new strategies
+        if learning_mode and strategy.total_attempts < 5:
+            logger.info(f"Strategy {strategy.strategy_id} included (learning mode - only {strategy.total_attempts} attempts so far)")
+            qualified_strategies.append(strategy)
+        # For inference or well-tested strategies, we require minimum success rate
+        elif strategy.success_rate >= MIN_SUCCESS_RATE_FOR_INFERENCE and strategy.total_attempts >= 5:
+            logger.info(f"Strategy {strategy.strategy_id} qualified - success rate {strategy.success_rate:.2f} >= minimum {MIN_SUCCESS_RATE_FOR_INFERENCE:.2f} with {strategy.total_attempts} attempts")
             qualified_strategies.append(strategy)
         else:
-            logger.info(f"Strategy {strategy.strategy_id} skipped - success rate {strategy.success_rate:.2f} < minimum {MIN_SUCCESS_RATE_FOR_INFERENCE:.2f}")
+            if strategy.total_attempts < 5:
+                logger.info(f"Strategy {strategy.strategy_id} skipped - insufficient attempts ({strategy.total_attempts} < 5) in inference mode")
+            else:
+                logger.info(f"Strategy {strategy.strategy_id} skipped - success rate {strategy.success_rate:.2f} < minimum {MIN_SUCCESS_RATE_FOR_INFERENCE:.2f}")
     
     if not qualified_strategies:
         logger.info(f"No strategies meet the minimum success rate threshold ({MIN_SUCCESS_RATE_FOR_INFERENCE:.2f}) for problem type '{problem_type}'")
diff --git a/optillm/plugins/spl/main.py b/optillm/plugins/spl/main.py
index 543ea13..a6e4ceb 100644
--- a/optillm/plugins/spl/main.py
+++ b/optillm/plugins/spl/main.py
@@ -119,7 +119,7 @@ def run_spl(system_prompt: str, initial_query: str, client, model: str, request_
     existing_strategies = db.get_strategies_for_problem(problem_type)
     
     # 6. Select relevant strategies for this problem (using inference limit)
-    selected_strategies = select_relevant_strategies(initial_query, problem_type, db, MAX_STRATEGIES_FOR_INFERENCE)
+    selected_strategies = select_relevant_strategies(initial_query, problem_type, db, learning_mode, MAX_STRATEGIES_FOR_INFERENCE)
     
     # Log the selected strategies
     for i, strategy in enumerate(selected_strategies, 1):