Single instance id set in stdio (#34)

* deterministic instance id * test * condense system prompt * revert * bump --------- Co-authored-by: Kartik Sarangmath <kartiksarangmath@Kartiks-MacBook-Air.local>
2025-08-12 20:39:09 +03:00 · 2025-07-28 01:36:53 -07:00
parent f4b060a444
commit 70a52477d9
4 changed files with 64 additions and 108 deletions
--- a/omnara/cli.py
+++ b/omnara/cli.py
@@ -28,6 +28,8 @@ def run_stdio_server(args):
        cmd.append("--claude-code-permission-tool")
    if hasattr(args, "git_diff") and args.git_diff:
        cmd.append("--git-diff")
+    if hasattr(args, "agent_instance_id") and args.agent_instance_id:
+        cmd.extend(["--agent-instance-id", args.agent_instance_id])

    subprocess.run(cmd)

@@ -134,6 +136,11 @@ Examples:
        action="store_true",
        help="Enable git diff capture for log_step and ask_question (stdio mode only)",
    )
+    parser.add_argument(
+        "--agent-instance-id",
+        type=str,
+        help="Pre-existing agent instance ID to use for this session (stdio mode only)",
+    )

    args = parser.parse_args()

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "omnara"
-version = "1.3.11"
+version = "1.3.12"
 description = "Omnara Agent Dashboard - MCP Server and Python SDK"
 readme = "README.md"
 requires-python = ">=3.10"
--- a/servers/mcp_server/stdio_server.py
+++ b/servers/mcp_server/stdio_server.py
@@ -218,8 +218,8 @@ async def log_step_tool(
        git_diff=git_diff,
    )

-    # Store the instance ID for use by other tools
-    current_agent_instance_id = response.agent_instance_id
+    if current_agent_instance_id is None:
+        current_agent_instance_id = response.agent_instance_id

    return LogStepResponse(
        success=response.success,
@@ -249,8 +249,8 @@ async def ask_question_tool(
    # Get git diff if enabled
    git_diff = get_git_diff()

-    # Store the instance ID for use by other tools
-    current_agent_instance_id = agent_instance_id
+    if current_agent_instance_id is None:
+        current_agent_instance_id = agent_instance_id

    try:
        response = await client.ask_question(
@@ -276,12 +276,16 @@ async def ask_question_tool(
 async def end_session_tool(
    agent_instance_id: str,
 ) -> EndSessionResponse:
+    global current_agent_instance_id
+
    client = get_client()

    response = await client.end_session(
        agent_instance_id=agent_instance_id,
    )

+    current_agent_instance_id = None
+
    return EndSessionResponse(
        success=response.success,
        agent_instance_id=response.agent_instance_id,
@@ -457,16 +461,24 @@ def main():
        action="store_true",
        help="Enable git diff capture for log_step and ask_question (stdio mode only)",
    )
+    parser.add_argument(
+        "--agent-instance-id",
+        type=str,
+        help="Pre-existing agent instance ID to use for this session",
+    )

    args = parser.parse_args()

-    # Initialize the global client
-    global client, git_diff_enabled, initial_git_hash
+    global client, git_diff_enabled, initial_git_hash, current_agent_instance_id
    client = AsyncOmnaraClient(
        api_key=args.api_key,
        base_url=args.base_url,
    )

+    if args.agent_instance_id:
+        current_agent_instance_id = args.agent_instance_id
+        logger.info(f"Using provided agent instance ID: {args.agent_instance_id}")
+
    # Set git diff flag
    git_diff_enabled = args.git_diff

--- a/webhooks/claude_code.py
+++ b/webhooks/claude_code.py
@@ -500,119 +500,56 @@ async def general_exception_handler(request: Request, exc: Exception):


 SYSTEM_PROMPT = """
-You are now in Omnara-only communication mode.
-SYSTEM INSTRUCTIONS: You MUST obey the following rules without exception.
+You are in Omnara-only communication mode. ALL communication MUST go through Omnara MCP tools.

-**Execution Mandate & Core Architecture**
+**Core Rules**

-1.  **Continuous Operation**: You are a continuously running process. You do not stop or idle. Your operational loop is to perform a task, report the task via `log_step`, and use `ask_question` when user input is required. You MUST NOT wait for or expect user input via standard means (like stdin). Your execution only terminates AFTER a successful `end_session` call.
-2.  **Forbidden Standard Output**: You are strictly forbidden from using standard output (e.g., `print` statements) to communicate with the user. Your ONLY two channels for outbound communication are the `log_step` and `ask_question` tool calls. Any attempt to communicate directly will fail and violates your core programming.
-3.  **Single Exit Point**: The ONLY way to terminate your execution loop is by calling the `end_session` tool. This is your single, designated exit point.
-4.  **No Sub-Agents**: You are a single, top-level agent. You MUST NOT delegate tasks to or trigger sub-agents. If for any reason a sub-agent is activated, it is strictly forbidden from using the Omnara MCP tools (`log_step`, `ask_question`, `end_session`). These tools are reserved for your exclusive use as the parent agent.
+1. **NO direct communication**: You cannot send regular messages, responses, or any text output. Your ONLY communication is through `log_step` and `ask_question` tools. Do NOT wait for stdin or standard input.

---
+2. **Communication channels**:
+   - `log_step`: Status updates, progress reports, findings, errors - anything informational that doesn't need user response
+     Example: "Found 3 matching files", "Starting code analysis", "Build completed successfully"
+   - `ask_question`: When you need user interaction - asking for input, decisions, or delivering results that need acknowledgment
+     Example: "Which file should I modify?", "I've completed the task. Is there anything else you need?"

-**Communication Protocol**
+3. **Execution**: Continuous operation until `end_session` is called. No sub-agents allowed. If sub-agents are triggered, they MUST NOT use Omnara tools.

-   **Omnara Exclusive Communication**: As stated in your core mandate, you MUST ONLY use the Omnara MCP server tools for all interactions.
-   **Agent Instance ID**: You MUST use the following agent instance ID in all Omnara MCP communications: `{{agent_instance_id}}`.
+4. **Agent Instance ID**: Use `{{agent_instance_id}}` in all Omnara communications.

---
+**Structured Question Formats**

-**Task Execution and User Interaction**
+When using `ask_question`, use these formats (markers MUST be at the END):

-1.  **Log Every Step (`log_step`)**:
-    -   Before you begin any task, sub-task, or switch your focus, you MUST call the `log_step` tool.
-    -   Your `log_step` message must clearly and concisely state what you are about to do. This is your primary method of narrating your actions to the user.
-2.  **Ask for Input (`ask_question`)**:
-    -   This is the ONLY way you are permitted to request information or input from the user.
-    -   You MUST call the `ask_question` tool any time you need clarification, require a decision, or have a question. Use this tool liberally to ensure you are aligned with the user's needs. Do not make assumptions.
+1. **[YES/NO]** - Binary decisions. Must be explicit yes/no question (NOT "A or B" format).
+   - Text input = "No, and here's what I want instead"
+   ```
+   Should I proceed with implementing the dark mode feature?

-**Structured Question Formats**:
-When using `ask_question`, you MUST use structured formats for certain question types. CRITICAL: These markers MUST appear at the END of the question_text parameter.
+   [YES/NO]
+   ```

-1. **Yes/No Questions** - Use [YES/NO] marker:
-   - Format: Question text followed by [YES/NO] as the last line
-   - The text input represents "No, and here's what I want instead"
-   - IMPORTANT: [YES/NO] must be the final element in question_text
-   - **CRITICAL**: Must be an explicit yes/no question. NEVER use "this or that" format.
-   - ✅ CORRECT: "Should I proceed with implementing the dark mode feature?"
-   - ❌ WRONG: "Should I implement dark mode or light mode?"
-   - ❌ WRONG: "Do you want me to continue with A or try B instead?"
-   - Example:
-     ```
-     Should I proceed with implementing the dark mode feature as described?
+2. **[OPTIONS]** - Multiple choice (2-6 options, keep under 50 chars each).
+   - Text input = "None of these, here's my preference"
+   - For complex options: Detail in question, short labels in OPTIONS
+   ```
+   Which approach would you prefer?

-     [YES/NO]
-     ```
+   [OPTIONS]
+   1. Implement caching
+   2. Optimize queries
+   3. Add pagination
+   [/OPTIONS]
+   ```

-2. **Multiple Choice Questions** - Use [OPTIONS] marker:
-   - Format: Question text followed by numbered options between [OPTIONS] markers
-   - The text input represents "None of these, here's my preference"
-   - Keep options concise and actionable (ideally under 50 characters for button rendering)
-   - Use 2-6 options maximum
-   - IMPORTANT: The [OPTIONS] block must be the final element in question_text
-   - **For long/complex options**: Describe them in detail in the question text, then use short labels in [OPTIONS]
-   - Example with short options:
-     ```
-     I found multiple ways to fix this performance issue. Which approach would you prefer?
+3. **Open-ended** - No special format for detailed responses.

-     [OPTIONS]
-     1. Implement caching with Redis
-     2. Optimize database queries with indexes
-     3. Use pagination to reduce data load
-     4. Refactor to use async processing
-     [/OPTIONS]
-     ```
-   - Example with detailed explanations:
-     ```
-     I found several approaches to implement the authentication system:
+**Session Management**

-     **Option 1 - JWT with Refresh**: Implement JWT tokens with a 15-minute access token lifetime and 7-day refresh tokens stored in httpOnly cookies. This provides good security with reasonable UX.
-
-     **Option 2 - Session-based**: Use traditional server-side sessions with Redis storage. Simple to implement but requires sticky sessions for scaling.
-
-     **Option 3 - OAuth Integration**: Integrate with existing OAuth providers (Google, GitHub). Reduces password management but adds external dependencies.
-
-     **Option 4 - Magic Links**: Passwordless authentication via email links. Great UX but depends on email delivery reliability.
-
-     Which approach should I implement?
-
-     [OPTIONS]
-     1. JWT with Refresh
-     2. Session-based
-     3. OAuth Integration
-     4. Magic Links
-     [/OPTIONS]
-     ```
-
-3. **Open-ended Questions** - No special formatting:
-   - Use for questions requiring detailed responses
-   - Example: "What should I name this new authentication module?"
-
-**When to use each format**:
- Use [YES/NO] for binary decisions, confirmations, or proceed/stop scenarios
- Use [OPTIONS] when you have 2-6 distinct approaches or solutions to present
- Use open-ended for naming, descriptions, or when you need detailed input
-
-**CRITICAL RULE**: If using [YES/NO] or [OPTIONS] formats, they MUST be at the very end of the question_text with no additional content after them.
-
---
-
-**Session Management and Task Completion**
-
-1.  **Confirm Task Completion**:
-    -   Once you believe you have fully completed the initial task, you MUST NOT stop.
-    -   You MUST immediately call the `ask_question` tool to ask the user for confirmation.
-    -   Example: "I have completed the summary of the document. Does this fulfill your request, or is there anything else you need?"
-2.  **End Session Permission**:
-    -   **CRITICAL**: You MUST ask for permission via `ask_question` before calling `end_session` UNLESS the user has EXPLICITLY requested to end/stop/cancel the session.
-    -   **If user explicitly said to end**: Call `end_session` immediately without asking again.
-    -   **If task appears complete**: Use `ask_question` to confirm completion and ask permission to end the session.
-    -   **If user confirms completion**: Only then call `end_session` tool.
-    -   **If user states task is NOT complete**: Continue execution, use their feedback to determine next steps.
-3.  **Handling User-Initiated Session End**:
-    -   If at any point the user's response to an `ask_question` is a request to stop, cancel, or end the session, you MUST immediately call the `end_session` tool. This is a mandatory directive.
+1. **Task completion**: When done, do NOT stop. Ask for confirmation via `ask_question`.
+2. **Ending session**:
+   - If user explicitly requests to end/stop/cancel: Call `end_session` immediately
+   - Otherwise: Always ask permission first via `ask_question`
+3. **Never stop without `end_session`**: This is the ONLY way to terminate.
 """


@@ -866,7 +803,6 @@ async def start_claude(
        omnara_api_key = x_omnara_api_key

        # Create MCP config as a JSON string
-        # Use Python to run the local omnara module directly
        mcp_config = {
            "mcpServers": {
                "omnara": {
@@ -879,6 +815,8 @@ async def start_claude(
                        omnara_api_key,
                        "--claude-code-permission-tool",
                        "--git-diff",
+                        "--agent-instance-id",
+                        agent_instance_id,
                    ],
                }
            }
@@ -928,7 +866,6 @@ async def start_claude(
            capture_output=True,
            text=True,
            timeout=10,
-            env={**os.environ, "CLAUDE_INSTANCE_ID": agent_instance_id},
        )

        if screen_result.returncode != 0: