diff --git a/pyproject.toml b/pyproject.toml index d35c832..2adf328 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,7 @@ Issues = "https://github.com/omnara-ai/omnara/issues" omnara = "servers.mcp_server.stdio_server:main" [tool.setuptools.packages.find] -include = ["omnara*", "servers*", "shared*", "backend*"] +include = ["omnara*", "servers", "servers.mcp_server"] [tool.pytest.ini_options] markers = [ diff --git a/servers/mcp_server/descriptions.py b/servers/mcp_server/descriptions.py new file mode 100644 index 0000000..415710f --- /dev/null +++ b/servers/mcp_server/descriptions.py @@ -0,0 +1,86 @@ +"""Tool descriptions for MCP server""" + +LOG_STEP_DESCRIPTION = """Log a high-level step the agent is performing. + +⚠️ CRITICAL: MUST be called for EVERY significant action: +• Before answering any user question or request +• When performing analysis, searches, or investigations +• When reading files, exploring code, or gathering information +• When making code changes, edits, or file modifications +• When running commands, tests, or terminal operations +• When providing explanations, solutions, or recommendations +• At the start of multi-step processes or complex tasks + +This call retrieves unread user feedback that you MUST incorporate into your work. +Feedback may contain corrections, clarifications, or additional instructions that override your original plan. + +Args: + agent_instance_id: Existing agent instance ID (optional). If omitted, creates a new instance for reuse in subsequent steps. + step_description: Clear, specific description of what you're about to do or currently doing. + +⚠️ RETURNS USER FEEDBACK: If user_feedback is not empty, you MUST: + 1. Read and understand each feedback message + 2. Adjust your current approach based on the feedback + 3. Acknowledge the feedback in your response + 4. Prioritize user feedback over your original plan + +Feedback is automatically marked as retrieved. If empty, continue as planned.""" + + +ASK_QUESTION_DESCRIPTION = """🤖 INTERACTIVE: Ask the user a question and WAIT for their reply (BLOCKS execution). + +⚠️ CRITICAL: ALWAYS call log_step BEFORE using this tool to track the interaction. + +🎯 USE WHEN YOU NEED: +• Clarification on ambiguous requirements or unclear instructions +• User decision between multiple valid approaches or solutions +• Confirmation before making significant changes (deleting files, major refactors) +• Missing information that you cannot determine from context or codebase +• User preferences for implementation details (styling, naming, architecture) +• Validation of assumptions before proceeding with complex tasks + +💡 BEST PRACTICES: +• Keep questions clear, specific, and actionable +• Provide context: explain WHY you're asking +• Offer options when multiple choices exist +• Ask one focused question at a time +• Include relevant details to help user decide + +Args: + agent_instance_id: Current agent instance ID. REQUIRED. + question_text: Clear, specific question with sufficient context for the user to provide a helpful answer.""" + + +END_SESSION_DESCRIPTION = """End the current agent session and mark it as completed. + +⚠️ IMPORTANT: Before using this tool, you MUST: +1. Provide a comprehensive summary of all actions taken to complete the task +2. Use the ask_question tool to confirm with the user that the task is complete +3. Only proceed with end_session if the user confirms completion + +Example confirmation question: +"I've completed the following tasks: +• [List of specific actions taken] +• [Key changes or implementations made] +• [Any important outcomes or results] + +Is this task complete and ready to be marked as finished?" + +If the user: +• Confirms completion → Use end_session tool +• Does NOT confirm → Continue working on their feedback or new requirements +• Requests additional work → Do NOT end the session, continue with the new tasks + +Use this tool ONLY when: +• The user has explicitly confirmed the task is complete +• The user explicitly asks to end the session +• An unrecoverable error prevents any further work + +This will: +• Mark the agent instance status as COMPLETED +• Set the session end time +• Deactivate any pending questions +• Prevent further updates to this session + +Args: + agent_instance_id: Current agent instance ID to end. REQUIRED.""" \ No newline at end of file diff --git a/servers/mcp_server/server.py b/servers/mcp_server/server.py index 2b0b162..f79fe23 100644 --- a/servers/mcp_server/server.py +++ b/servers/mcp_server/server.py @@ -12,10 +12,12 @@ from fastmcp.server.dependencies import get_access_token from shared.config import settings from .models import AskQuestionResponse, EndSessionResponse, LogStepResponse -from .tools import ( +from .descriptions import ( LOG_STEP_DESCRIPTION, ASK_QUESTION_DESCRIPTION, END_SESSION_DESCRIPTION, +) +from .tools import ( log_step_impl, ask_question_impl, end_session_impl, diff --git a/servers/mcp_server/stdio_server.py b/servers/mcp_server/stdio_server.py index 0b7c130..0c3b8c0 100644 --- a/servers/mcp_server/stdio_server.py +++ b/servers/mcp_server/stdio_server.py @@ -8,23 +8,17 @@ It provides the same functionality as the hosted server but uses stdio transport import argparse import asyncio import logging -from collections.abc import Callable, Coroutine -from functools import wraps -from typing import Any, ParamSpec, TypeVar +from typing import Optional from fastmcp import FastMCP -from shared.config import settings -from shared.database import Base -from shared.database.session import engine +from omnara.sdk import AsyncOmnaraClient +from omnara.sdk.exceptions import TimeoutError as OmnaraTimeoutError from .models import AskQuestionResponse, EndSessionResponse, LogStepResponse -from .tools import ( +from .descriptions import ( LOG_STEP_DESCRIPTION, ASK_QUESTION_DESCRIPTION, END_SESSION_DESCRIPTION, - log_step_impl, - ask_question_impl, - end_session_impl, ) from .utils import detect_agent_type_from_environment @@ -32,31 +26,15 @@ from .utils import detect_agent_type_from_environment logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -# Type variables for decorator -P = ParamSpec("P") -T = TypeVar("T") +# Global client instance +client: Optional[AsyncOmnaraClient] = None -def require_api_key(func: Callable[P, T]) -> Callable[P, Coroutine[Any, Any, T]]: - """Decorator to ensure API key is provided for stdio server.""" - - @wraps(func) - async def wrapper(*args: P.args, **kwargs: P.kwargs) -> T: - # For stdio, we get the API key from command line args - # and use it as the user_id for simplicity - api_key = getattr(require_api_key, "_api_key", None) - if not api_key: - raise ValueError("API key is required. Use --api-key argument.") - - # Add user_id to kwargs for use in the function - kwargs["_user_id"] = api_key - result = func(*args, **kwargs) - # Handle both sync and async functions - if asyncio.iscoroutine(result): - return await result - return result - - return wrapper +def get_client() -> AsyncOmnaraClient: + """Get the initialized AsyncOmnaraClient instance.""" + if client is None: + raise RuntimeError("Client not initialized. Run main() first.") + return client # Create FastMCP server @@ -64,18 +42,24 @@ mcp = FastMCP("Omnara Agent Dashboard MCP Server") @mcp.tool(name="log_step", description=LOG_STEP_DESCRIPTION) -@require_api_key -def log_step_tool( +async def log_step_tool( agent_instance_id: str | None = None, step_description: str = "", - _user_id: str = "", # Injected by decorator ) -> LogStepResponse: agent_type = detect_agent_type_from_environment() - return log_step_impl( - agent_instance_id=agent_instance_id, + client = get_client() + + response = await client.log_step( agent_type=agent_type, step_description=step_description, - user_id=_user_id, + agent_instance_id=agent_instance_id, + ) + + return LogStepResponse( + success=response.success, + agent_instance_id=response.agent_instance_id, + step_number=response.step_number, + user_feedback=response.user_feedback, ) @@ -83,31 +67,50 @@ def log_step_tool( name="ask_question", description=ASK_QUESTION_DESCRIPTION, ) -@require_api_key async def ask_question_tool( agent_instance_id: str | None = None, question_text: str | None = None, - _user_id: str = "", # Injected by decorator ) -> AskQuestionResponse: - return await ask_question_impl( - agent_instance_id=agent_instance_id, - question_text=question_text, - user_id=_user_id, - ) + if not agent_instance_id: + raise ValueError("agent_instance_id is required") + if not question_text: + raise ValueError("question_text is required") + + client = get_client() + + try: + response = await client.ask_question( + agent_instance_id=agent_instance_id, + question_text=question_text, + timeout_minutes=1440, # 24 hours default + poll_interval=1.0, + ) + + return AskQuestionResponse( + answer=response.answer, + question_id=response.question_id, + ) + except OmnaraTimeoutError: + raise TimeoutError("Question timed out waiting for user response") @mcp.tool( name="end_session", description=END_SESSION_DESCRIPTION, ) -@require_api_key -def end_session_tool( +async def end_session_tool( agent_instance_id: str, - _user_id: str = "", # Injected by decorator ) -> EndSessionResponse: - return end_session_impl( + client = get_client() + + response = await client.end_session( agent_instance_id=agent_instance_id, - user_id=_user_id, + ) + + return EndSessionResponse( + success=response.success, + agent_instance_id=response.agent_instance_id, + final_status=response.final_status, ) @@ -115,18 +118,23 @@ def main(): """Main entry point for the stdio server""" parser = argparse.ArgumentParser(description="Omnara MCP Server (Stdio)") parser.add_argument("--api-key", required=True, help="API key for authentication") + parser.add_argument( + "--base-url", + default="https://agent-dashboard-mcp.onrender.com", + help="Base URL of the Omnara API server", + ) args = parser.parse_args() - # Store API key for auth decorator - require_api_key._api_key = args.api_key - - # Ensure database tables exist - Base.metadata.create_all(bind=engine) - logger.info("Database tables created/verified") + # Initialize the global client + global client + client = AsyncOmnaraClient( + api_key=args.api_key, + base_url=args.base_url, + ) logger.info("Starting Omnara MCP server (stdio)") - logger.info(f"Database URL configured: {settings.database_url[:50]}...") + logger.info(f"Using API server: {args.base_url}") try: # Run with stdio transport (default) @@ -134,6 +142,10 @@ def main(): except Exception as e: logger.error(f"Failed to start MCP server: {e}") raise + finally: + # Clean up client + if client: + asyncio.run(client.close()) if __name__ == "__main__": diff --git a/servers/mcp_server/tools.py b/servers/mcp_server/tools.py index 6794365..4732252 100644 --- a/servers/mcp_server/tools.py +++ b/servers/mcp_server/tools.py @@ -16,91 +16,6 @@ from servers.shared.core import ( ) from .models import AskQuestionResponse, EndSessionResponse, LogStepResponse -LOG_STEP_DESCRIPTION = """Log a high-level step the agent is performing. - -⚠️ CRITICAL: MUST be called for EVERY significant action: -• Before answering any user question or request -• When performing analysis, searches, or investigations -• When reading files, exploring code, or gathering information -• When making code changes, edits, or file modifications -• When running commands, tests, or terminal operations -• When providing explanations, solutions, or recommendations -• At the start of multi-step processes or complex tasks - -This call retrieves unread user feedback that you MUST incorporate into your work. -Feedback may contain corrections, clarifications, or additional instructions that override your original plan. - -Args: - agent_instance_id: Existing agent instance ID (optional). If omitted, creates a new instance for reuse in subsequent steps. - step_description: Clear, specific description of what you're about to do or currently doing. - -⚠️ RETURNS USER FEEDBACK: If user_feedback is not empty, you MUST: - 1. Read and understand each feedback message - 2. Adjust your current approach based on the feedback - 3. Acknowledge the feedback in your response - 4. Prioritize user feedback over your original plan - -Feedback is automatically marked as retrieved. If empty, continue as planned.""" - - -ASK_QUESTION_DESCRIPTION = """🤖 INTERACTIVE: Ask the user a question and WAIT for their reply (BLOCKS execution). - -⚠️ CRITICAL: ALWAYS call log_step BEFORE using this tool to track the interaction. - -🎯 USE WHEN YOU NEED: -• Clarification on ambiguous requirements or unclear instructions -• User decision between multiple valid approaches or solutions -• Confirmation before making significant changes (deleting files, major refactors) -• Missing information that you cannot determine from context or codebase -• User preferences for implementation details (styling, naming, architecture) -• Validation of assumptions before proceeding with complex tasks - -💡 BEST PRACTICES: -• Keep questions clear, specific, and actionable -• Provide context: explain WHY you're asking -• Offer options when multiple choices exist -• Ask one focused question at a time -• Include relevant details to help user decide - -Args: - agent_instance_id: Current agent instance ID. REQUIRED. - question_text: Clear, specific question with sufficient context for the user to provide a helpful answer.""" - - -END_SESSION_DESCRIPTION = """End the current agent session and mark it as completed. - -⚠️ IMPORTANT: Before using this tool, you MUST: -1. Provide a comprehensive summary of all actions taken to complete the task -2. Use the ask_question tool to confirm with the user that the task is complete -3. Only proceed with end_session if the user confirms completion - -Example confirmation question: -"I've completed the following tasks: -• [List of specific actions taken] -• [Key changes or implementations made] -• [Any important outcomes or results] - -Is this task complete and ready to be marked as finished?" - -If the user: -• Confirms completion → Use end_session tool -• Does NOT confirm → Continue working on their feedback or new requirements -• Requests additional work → Do NOT end the session, continue with the new tasks - -Use this tool ONLY when: -• The user has explicitly confirmed the task is complete -• The user explicitly asks to end the session -• An unrecoverable error prevents any further work - -This will: -• Mark the agent instance status as COMPLETED -• Set the session end time -• Deactivate any pending questions -• Prevent further updates to this session - -Args: - agent_instance_id: Current agent instance ID to end. REQUIRED.""" - def log_step_impl( agent_instance_id: str | None = None,