Files
mcphost-api/serve_mcphost_openai_compatible.py

421 lines
13 KiB
Python

from typing import List, Optional
from pydantic import BaseModel
import time
import json
import asyncio
import uuid
import sys
import threading
from fastapi import FastAPI, HTTPException
from fastapi.responses import StreamingResponse
from functools import wraps
from contextlib import asynccontextmanager
import pexpect
from loguru import logger
from settings import settings
from helpers.response_cleaners import clean_response
class Config:
"""Configuration constants for MCPHost management"""
SPAWN_TIMEOUT = 60
ECHO_DELAY = 0.5
READ_TIMEOUT = 0.1
RESPONSE_WAIT_TIME = 1
CHUNK_SIZE = 1000
MAX_READ_SIZE = 10000
PROMPT_INDICATOR = "Enter your prompt"
def log_performance(func):
"""Decorator to log function performance"""
@wraps(func)
def wrapper(*args, **kwargs):
start_time = time.time()
logger.debug("Starting {}", func.__name__)
try:
result = func(*args, **kwargs)
duration = time.time() - start_time
logger.debug("{} completed in {:.2f}s", func.__name__, duration)
return result
except Exception as e:
duration = time.time() - start_time
logger.error("{} failed after {:.2f}s: {}", func.__name__, duration, str(e))
raise
return wrapper
# Configure loguru
logger.remove()
logger.add(
sys.stderr,
level="DEBUG" if settings.debug else "INFO",
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
colorize=True,
backtrace=True,
diagnose=True
)
class MCPHostManager:
"""Manages MCP process lifecycle and communication"""
def __init__(self):
self.child: Optional[pexpect.spawn] = None
self.config = Config()
self.lock = threading.Lock()
def __enter__(self):
self.start()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.shutdown()
@log_performance
def start(self) -> bool:
"""Start the mcphost process"""
command = self._build_command()
logger.info("Starting mcphost: {}", ' '.join(command))
try:
self.child = pexpect.spawn(
' '.join(command),
timeout=self.config.SPAWN_TIMEOUT,
encoding='utf-8'
)
self.child.setecho(False)
return self._wait_for_ready()
except Exception as e:
logger.error("Error starting mcphost: {}", e)
logger.exception("Full traceback:")
return False
@log_performance
async def send_prompt_async(self, prompt: str) -> str:
"""Send a prompt to mcphost and get the response (async wrapper)"""
loop = asyncio.get_event_loop()
return await loop.run_in_executor(None, self.send_prompt, prompt)
def send_prompt(self, prompt: str) -> str:
"""Send a prompt to mcphost and get the response"""
with self.lock:
if not self._is_alive():
logger.warning("MCPHost not running, attempting to restart...")
if not self.start():
return "Error: Failed to restart MCPHost"
try:
self._clear_pending_output()
self._send_command(prompt)
response = self._collect_response()
return clean_response(response, prompt)
except Exception as e:
logger.exception("Exception in send_prompt: {}", str(e))
return f"Error: {str(e)}"
def shutdown(self):
"""Shutdown mcphost gracefully"""
if self._is_alive():
logger.info("Shutting down mcphost...")
self.child.sendcontrol('c')
self.child.close()
logger.info("MCPHost shutdown complete")
def _is_alive(self) -> bool:
"""Check if the process is running"""
return self.child is not None and self.child.isalive()
def _build_command(self) -> List[str]:
"""Build the command to start mcphost"""
command = [
settings.mcphost_path,
'--config', settings.mcphost_config,
'--model', settings.mcphost_model,
'--openai-url', settings.openai_url,
'--openai-api-key', settings.openai_api_key
]
if settings.debug:
command.insert(1, '--debug')
return command
def _wait_for_ready(self) -> bool:
"""Wait for the process to be ready"""
try:
self.child.expect(self.config.PROMPT_INDICATOR)
logger.success("MCPHost started and ready")
self._clear_buffer()
return True
except Exception as e:
logger.error("Error waiting for prompt: {}", e)
return False
def _clear_buffer(self):
"""Clear any remaining output in the buffer"""
time.sleep(self.config.ECHO_DELAY)
try:
self.child.read_nonblocking(
size=self.config.MAX_READ_SIZE,
timeout=self.config.READ_TIMEOUT
)
except:
pass
def _clear_pending_output(self):
"""Clear any pending output from the process"""
try:
self.child.read_nonblocking(
size=self.config.MAX_READ_SIZE,
timeout=self.config.READ_TIMEOUT
)
except:
pass
def _send_command(self, prompt: str):
"""Send a command to the process"""
logger.debug("Sending prompt: {}", prompt)
self.child.send(prompt)
self.child.send('\r')
# Wait for the model to process
time.sleep(self.config.RESPONSE_WAIT_TIME)
def _collect_response(self) -> str:
"""Collect response from the process"""
response = ""
response_complete = False
with logger.catch(message="Error during response collection"):
while not response_complete:
try:
chunk = self.child.read_nonblocking(
size=self.config.CHUNK_SIZE,
timeout=3
)
if chunk:
response += chunk
logger.trace("Received chunk: {}", chunk[:50] + "..." if len(chunk) > 50 else chunk)
if self.config.PROMPT_INDICATOR in chunk:
response_complete = True
else:
break
except pexpect.TIMEOUT:
if response and self.config.PROMPT_INDICATOR in response:
response_complete = True
elif response:
logger.debug("Waiting for more response data...")
time.sleep(1)
continue
else:
break
except Exception as e:
logger.error("Error reading response: {}", e)
break
logger.debug("Collected response length: {} characters", len(response))
return response
# Configuration for available models
AVAILABLE_MODELS = [
{
"id": "mcphost-model",
"object": "model",
"created": 1686935002,
"owned_by": "mcphost",
"permission": [
{
"id": "modelcphost-" + str(uuid.uuid4())[:8],
"object": "model_permission",
"created": int(time.time()),
"allow_create_engine": False,
"allow_sampling": True,
"allow_logprobs": True,
"allow_search_indices": False,
"allow_view": True,
"allow_fine_tuning": False,
"organization": "*",
"group": None,
"is_blocking": False
}
],
"root": "mcphost-model",
"parent": None
},
]
class ChatMessage(BaseModel):
role: str
content: str
class ChatCompletionRequest(BaseModel):
model: str
messages: List[ChatMessage]
temperature: Optional[float] = 1.0
stream: Optional[bool] = False
max_tokens: Optional[int] = None
# Initialize the MCPHost manager
mcp_manager = MCPHostManager()
async def process_with_mcphost(messages: List[ChatMessage], model: str) -> str:
"""Process messages using MCPHost"""
# Get the last user message
last_user_message = next((msg.content for msg in reversed(messages) if msg.role == "user"), "")
if not last_user_message:
return "No user message found"
# Send to MCPHost and get response
response = await mcp_manager.send_prompt_async(last_user_message)
return response
def generate_id() -> str:
return str(uuid.uuid4())[:8]
async def stream_response(content: str, model: str):
words = content.split()
for i, word in enumerate(words):
chunk = {
"id": f"chatcmpl-{generate_id()}",
"object": "chat.completion.chunk",
"created": int(time.time()),
"model": model,
"choices": [{
"index": 0,
"delta": {
"content": word + " "
},
"finish_reason": None
}]
}
yield f"data: {json.dumps(chunk)}\n\n"
await asyncio.sleep(0.1)
final_chunk = {
"id": f"chatcmpl-{generate_id()}",
"object": "chat.completion.chunk",
"created": int(time.time()),
"model": model,
"choices": [{
"index": 0,
"delta": {},
"finish_reason": "stop"
}]
}
yield f"data: {json.dumps(final_chunk)}\n\n"
yield "data: [DONE]\n\n"
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Manage application lifespan"""
# Startup
logger.info("=" * 50)
logger.info("MCPHost OpenAI-compatible API Server v1.0")
logger.info("Debug Mode: {}", "ON" if settings.debug else "OFF")
logger.info("=" * 50)
if not mcp_manager.start():
logger.error("Failed to start MCPHost")
# You might want to exit or handle this differently
else:
logger.success("MCPHost started successfully")
yield
# Shutdown
logger.info("Shutting down MCPHost...")
mcp_manager.shutdown()
logger.success("Shutdown complete")
app = FastAPI(title="MCPHost OpenAI-compatible API", lifespan=lifespan)
@app.get("/v1/models")
async def list_models():
"""List all available models"""
return {
"object": "list",
"data": AVAILABLE_MODELS
}
@app.get("/v1/models/{model_id}")
async def get_model(model_id: str):
"""Get details of a specific model"""
model = next((m for m in AVAILABLE_MODELS if m["id"] == model_id), None)
if not model:
raise HTTPException(status_code=404, detail=f"Model {model_id} not found")
return model
@app.post("/v1/chat/completions")
async def chat_completions(request: ChatCompletionRequest):
# Validate model exists
if not any(model["id"] == request.model for model in AVAILABLE_MODELS):
raise HTTPException(status_code=404, detail=f"Model {request.model} not found")
response_content = await process_with_mcphost(request.messages, request.model)
if not request.stream:
return {
"id": f"chatcmpl-{generate_id()}",
"object": "chat.completion",
"created": int(time.time()),
"model": request.model,
"choices": [{
"index": 0,
"message": {
"role": "assistant",
"content": response_content
},
"finish_reason": "stop"
}],
"usage": {
"prompt_tokens": len(" ".join([msg.content for msg in request.messages]).split()),
"completion_tokens": len(response_content.split()),
"total_tokens": len(" ".join([msg.content for msg in request.messages]).split()) + len(
response_content.split())
}
}
else:
return StreamingResponse(
stream_response(response_content, request.model),
media_type="text/event-stream"
)
# Optional: Add a root endpoint that redirects to documentation
@app.get("/")
async def root():
return {"message": "MCPHost OpenAI-compatible API server. Visit /docs for documentation."}
# Optional: Add a health check endpoint
@app.get("/health")
async def health_check():
return {"status": "healthy", "mcphost_alive": mcp_manager._is_alive()}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)