update openai utils and response cleaners

2025-05-11 00:41:52 +03:00
parent 1b7dff9523
commit 47d506efb4
3 changed files with 155 additions and 62 deletions
--- a/commons/openai_models.py
+++ b/commons/openai_models.py
@@ -1,46 +0,0 @@
-import time
-import uuid
-from typing import List, Optional
-from pydantic import BaseModel
-
-
-class ChatMessage(BaseModel):
-    role: str
-    content: str
-
-
-class ChatCompletionRequest(BaseModel):
-    model: str
-    messages: List[ChatMessage]
-    temperature: Optional[float] = 1.0
-    stream: Optional[bool] = False
-    max_tokens: Optional[int] = None
-
-
-# Configuration for available models
-AVAILABLE_MODELS = [
-    {
-        "id": "mcphost-model",
-        "object": "model",
-        "created": 1686935002,
-        "owned_by": "mcphost",
-        "permission": [
-            {
-                "id": "modelcphost-" + str(uuid.uuid4())[:8],
-                "object": "model_permission",
-                "created": int(time.time()),
-                "allow_create_engine": False,
-                "allow_sampling": True,
-                "allow_logprobs": True,
-                "allow_search_indices": False,
-                "allow_view": True,
-                "allow_fine_tuning": False,
-                "organization": "*",
-                "group": None,
-                "is_blocking": False
-            }
-        ],
-        "root": "mcphost-model",
-        "parent": None
-    },
-]
--- a/commons/openai_utils.py
+++ b/commons/openai_utils.py
@@ -2,8 +2,53 @@ import json
 import time
 import uuid
 import asyncio
+import time
+import uuid
+from typing import List, Optional
+from pydantic import BaseModel


+class ChatMessage(BaseModel):
+    role: str
+    content: str
+
+
+class ChatCompletionRequest(BaseModel):
+    model: str
+    messages: List[ChatMessage]
+    temperature: Optional[float] = 1.0
+    stream: Optional[bool] = False
+    max_tokens: Optional[int] = None
+
+
+# Configuration for available models
+AVAILABLE_MODELS = [
+    {
+        "id": "mcphost-model",
+        "object": "model",
+        "created": 1686935002,
+        "owned_by": "mcphost",
+        "permission": [
+            {
+                "id": "modelcphost-" + str(uuid.uuid4())[:8],
+                "object": "model_permission",
+                "created": int(time.time()),
+                "allow_create_engine": False,
+                "allow_sampling": True,
+                "allow_logprobs": True,
+                "allow_search_indices": False,
+                "allow_view": True,
+                "allow_fine_tuning": False,
+                "organization": "*",
+                "group": None,
+                "is_blocking": False
+            }
+        ],
+        "root": "mcphost-model",
+        "parent": None
+    },
+]
+
 def generate_id() -> str:
    """Generate a unique ID for responses"""
    return str(uuid.uuid4())[:8]
--- a/commons/response_cleaners.py
+++ b/commons/response_cleaners.py
@@ -7,12 +7,30 @@ class Config:
    # Patterns for cleaning debug output
    ANSI_PATTERN = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
    TUI_BORDER = '┃'
-    SKIP_PATTERNS = ['alt+enter', 'Enter your prompt']
-    DEBUG_LOG_PATTERN = re.compile(r'^\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2} \w+ <.*?>.*$')
+
+    # Updated patterns to match the actual output format
+    SKIP_PATTERNS = [
+        'alt+enter / ctrl+j new line',
+        'ctrl+e open editor',
+        'enter submit',
+        'ctrl+c to quit',
+        'Enter your prompt',
+        'Type /help for commands'
+    ]
+
+    # Updated log pattern to match the actual format: "2025/05/11 00:38:45 INFO <cmd/root.go:495> Model loaded..."
+    DEBUG_LOG_PATTERN = re.compile(r'^\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2} \w+ <.*?>.*$', re.MULTILINE)
+
+    # Pattern for thinking spinner (if used)
    THINKING_SPINNER_PATTERN = re.compile(r'[⣽⢿⡿⣟⣯⣷⣾⣻] Thinking\.\.\.')
+
+    # Markers and indicators
    ASSISTANT_MARKER = "Assistant:"
    PROMPT_INDICATOR = "Enter your prompt"

+    # Pattern to match control hints at the bottom
+    CONTROL_HINTS_PATTERN = re.compile(r'(alt\+enter|ctrl\+[a-z]).*?(•|$)', re.IGNORECASE)
+

 def clean_response(response: str, original_prompt: str) -> str:
    """Clean and format MCP response"""
@@ -20,7 +38,7 @@ def clean_response(response: str, original_prompt: str) -> str:
        return ""

    # Debug log the raw response
-    logger.debug(f"Raw response before cleaning: {response}")
+    logger.debug(f"Raw response before cleaning: {response[:500]}...")

    # Remove ANSI escape sequences
    response = Config.ANSI_PATTERN.sub('', response)
@@ -64,49 +82,125 @@ def clean_assistant_section(assistant_response: str) -> str:
        if Config.THINKING_SPINNER_PATTERN.search(line):
            continue

+        # Skip control hints
+        if Config.CONTROL_HINTS_PATTERN.search(line):
+            continue
+
        # Handle TUI borders
        if stripped.startswith(Config.TUI_BORDER):
-            content = stripped.strip(Config.TUI_BORDER).strip()
-            if content:
+            content = line.lstrip(Config.TUI_BORDER).strip()
+            # Only add if there's actual content after the border and it's not a skip pattern
+            if content and not any(skip_pattern.lower() in content.lower() for skip_pattern in Config.SKIP_PATTERNS):
                cleaned_lines.append(content)
        else:
-            cleaned_lines.append(stripped)
+            # Add if not a skip pattern
+            if not any(skip_pattern.lower() in stripped.lower() for skip_pattern in Config.SKIP_PATTERNS):
+                cleaned_lines.append(stripped)

    return '\n'.join(cleaned_lines).strip()


 def clean_entire_response(response: str, original_prompt: str) -> str:
    """Clean the entire response when no Assistant: marker is found"""
+    # First remove all debug log lines
+    response = Config.DEBUG_LOG_PATTERN.sub('', response)
+
    lines = response.split('\n')
    cleaned_lines = []

+    # Flag to track if we've started seeing actual content
+    content_started = False
+
    for line in lines:
        stripped = line.strip()

-        # Skip empty lines and original prompt
-        if not stripped or stripped == original_prompt:
+        # Skip empty lines before content starts
+        if not stripped:
+            if content_started:
+                cleaned_lines.append('')
            continue

-        # Skip debug log lines
-        if Config.DEBUG_LOG_PATTERN.match(line):
+        # Skip lines that match the original prompt
+        if stripped == original_prompt:
+            content_started = True
            continue

        # Skip thinking spinner lines
        if Config.THINKING_SPINNER_PATTERN.search(line):
            continue

+        # Skip control hints
+        if Config.CONTROL_HINTS_PATTERN.search(line):
+            continue
+
        # Handle TUI decorations
-        if stripped.startswith(Config.TUI_BORDER):
-            content = stripped.strip(Config.TUI_BORDER).strip()
-            if content and content != original_prompt:
+        if line.startswith(Config.TUI_BORDER):
+            content = line.lstrip(Config.TUI_BORDER).strip()
+            # Skip if it matches any skip pattern
+            if any(skip_pattern.lower() in content.lower() for skip_pattern in Config.SKIP_PATTERNS):
+                continue
+            # Skip if it's empty or just whitespace
+            if not content:
+                continue
+            # Skip if it's the original prompt
+            if content == original_prompt:
+                content_started = True
+                continue
+
+            # If we have actual content, add it
+            if content:
+                content_started = True
                cleaned_lines.append(content)
            continue

-        # Skip navigation hints
-        if any(pattern in line for pattern in Config.SKIP_PATTERNS):
+        # Skip navigation hints and other skip patterns
+        if any(skip_pattern.lower() in line.lower() for skip_pattern in Config.SKIP_PATTERNS):
            continue

        # Add non-empty, non-decoration lines
+        content_started = True
        cleaned_lines.append(stripped)

-    return '\n'.join(cleaned_lines).strip()
+    # Clean up any trailing empty lines
+    while cleaned_lines and not cleaned_lines[-1]:
+        cleaned_lines.pop()
+
+    return '\n'.join(cleaned_lines).strip()
+
+
+def remove_tui_artifacts(text: str) -> str:
+    """Remove any remaining TUI artifacts from the text"""
+    # Remove lines that are just TUI borders with spaces
+    lines = text.split('\n')
+    cleaned = []
+
+    for line in lines:
+        # Skip lines that are just TUI borders with spaces
+        if line.strip() == Config.TUI_BORDER or line.strip() == '':
+            if cleaned and cleaned[-1] != '':  # Preserve single line breaks
+                cleaned.append('')
+        else:
+            cleaned.append(line)
+
+    return '\n'.join(cleaned).strip()
+
+
+def post_process_response(response: str) -> str:
+    """Final post-processing of the cleaned response"""
+    # Remove any remaining TUI artifacts
+    response = remove_tui_artifacts(response)
+
+    # Remove multiple consecutive newlines
+    response = re.sub(r'\n{3,}', '\n\n', response)
+
+    # Remove any remaining debug artifacts that might have slipped through
+    response = re.sub(r'DEBUG|INFO|ERROR|WARN.*?>', '', response)
+
+    return response.strip()
+
+
+# Export the main cleaning function with additional post-processing
+def clean_mcphost_response(response: str, original_prompt: str) -> str:
+    """Main entry point for cleaning MCPHost responses"""
+    cleaned = clean_response(response, original_prompt)
+    return post_process_response(cleaned)