90 lines
		
	
	
		
			2.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			90 lines
		
	
	
		
			2.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import json
 | |
| import time
 | |
| import uuid
 | |
| import asyncio
 | |
| import time
 | |
| import uuid
 | |
| from typing import List, Optional
 | |
| from pydantic import BaseModel
 | |
| 
 | |
| 
 | |
| class ChatMessage(BaseModel):
 | |
|     role: str
 | |
|     content: str
 | |
| 
 | |
| 
 | |
| class ChatCompletionRequest(BaseModel):
 | |
|     model: str
 | |
|     messages: List[ChatMessage]
 | |
|     temperature: Optional[float] = 1.0
 | |
|     stream: Optional[bool] = False
 | |
|     max_tokens: Optional[int] = None
 | |
| 
 | |
| 
 | |
| # Configuration for available models
 | |
| AVAILABLE_MODELS = [
 | |
|     {
 | |
|         "id": "mcphost-model",
 | |
|         "object": "model",
 | |
|         "created": 1686935002,
 | |
|         "owned_by": "mcphost",
 | |
|         "permission": [
 | |
|             {
 | |
|                 "id": "modelcphost-" + str(uuid.uuid4())[:8],
 | |
|                 "object": "model_permission",
 | |
|                 "created": int(time.time()),
 | |
|                 "allow_create_engine": False,
 | |
|                 "allow_sampling": True,
 | |
|                 "allow_logprobs": True,
 | |
|                 "allow_search_indices": False,
 | |
|                 "allow_view": True,
 | |
|                 "allow_fine_tuning": False,
 | |
|                 "organization": "*",
 | |
|                 "group": None,
 | |
|                 "is_blocking": False
 | |
|             }
 | |
|         ],
 | |
|         "root": "mcphost-model",
 | |
|         "parent": None
 | |
|     },
 | |
| ]
 | |
| 
 | |
| def generate_id() -> str:
 | |
|     """Generate a unique ID for responses"""
 | |
|     return str(uuid.uuid4())[:8]
 | |
| 
 | |
| 
 | |
| async def stream_response(content: str, model: str):
 | |
|     """Stream response content in OpenAI format"""
 | |
|     words = content.split()
 | |
| 
 | |
|     for i, word in enumerate(words):
 | |
|         chunk = {
 | |
|             "id": f"chatcmpl-{generate_id()}",
 | |
|             "object": "chat.completion.chunk",
 | |
|             "created": int(time.time()),
 | |
|             "model": model,
 | |
|             "choices": [{
 | |
|                 "index": 0,
 | |
|                 "delta": {
 | |
|                     "content": word + " "
 | |
|                 },
 | |
|                 "finish_reason": None
 | |
|             }]
 | |
|         }
 | |
|         yield f"data: {json.dumps(chunk)}\n\n"
 | |
|         await asyncio.sleep(0.1)
 | |
| 
 | |
|     final_chunk = {
 | |
|         "id": f"chatcmpl-{generate_id()}",
 | |
|         "object": "chat.completion.chunk",
 | |
|         "created": int(time.time()),
 | |
|         "model": model,
 | |
|         "choices": [{
 | |
|             "index": 0,
 | |
|             "delta": {},
 | |
|             "finish_reason": "stop"
 | |
|         }]
 | |
|     }
 | |
|     yield f"data: {json.dumps(final_chunk)}\n\n"
 | |
|     yield "data: [DONE]\n\n" | 
