mcphost-api/commons/openai_utils.py

import json
import time
import uuid
import asyncio
import time
import uuid
from typing import List, Optional
from pydantic import BaseModel


class ChatMessage(BaseModel):
    role: str
    content: str


class ChatCompletionRequest(BaseModel):
    model: str
    messages: List[ChatMessage]
    temperature: Optional[float] = 1.0
    stream: Optional[bool] = False
    max_tokens: Optional[int] = None


# Configuration for available models
AVAILABLE_MODELS = [
    {
        "id": "mcphost-model",
        "object": "model",
        "created": 1686935002,
        "owned_by": "mcphost",
        "permission": [
            {
                "id": "modelcphost-" + str(uuid.uuid4())[:8],
                "object": "model_permission",
                "created": int(time.time()),
                "allow_create_engine": False,
                "allow_sampling": True,
                "allow_logprobs": True,
                "allow_search_indices": False,
                "allow_view": True,
                "allow_fine_tuning": False,
                "organization": "*",
                "group": None,
                "is_blocking": False
            }
        ],
        "root": "mcphost-model",
        "parent": None
    },
]

def generate_id() -> str:
    """Generate a unique ID for responses"""
    return str(uuid.uuid4())[:8]


async def stream_response(content: str, model: str):
    """Stream response content in OpenAI format"""
    words = content.split()

    for i, word in enumerate(words):
        chunk = {
            "id": f"chatcmpl-{generate_id()}",
            "object": "chat.completion.chunk",
            "created": int(time.time()),
            "model": model,
            "choices": [{
                "index": 0,
                "delta": {
                    "content": word + " "
                },
                "finish_reason": None
            }]
        }
        yield f"data: {json.dumps(chunk)}\n\n"
        await asyncio.sleep(0.1)

    final_chunk = {
        "id": f"chatcmpl-{generate_id()}",
        "object": "chat.completion.chunk",
        "created": int(time.time()),
        "model": model,
        "choices": [{
            "index": 0,
            "delta": {},
            "finish_reason": "stop"
        }]
    }
    yield f"data: {json.dumps(final_chunk)}\n\n"
    yield "data: [DONE]\n\n"