mirror of
https://github.com/omnara-ai/omnara.git
synced 2025-08-12 20:39:09 +03:00
Single instance id set in stdio (#34)
* deterministic instance id * test * condense system prompt * revert * bump --------- Co-authored-by: Kartik Sarangmath <kartiksarangmath@Kartiks-MacBook-Air.local>
This commit is contained in:
@@ -28,6 +28,8 @@ def run_stdio_server(args):
|
||||
cmd.append("--claude-code-permission-tool")
|
||||
if hasattr(args, "git_diff") and args.git_diff:
|
||||
cmd.append("--git-diff")
|
||||
if hasattr(args, "agent_instance_id") and args.agent_instance_id:
|
||||
cmd.extend(["--agent-instance-id", args.agent_instance_id])
|
||||
|
||||
subprocess.run(cmd)
|
||||
|
||||
@@ -134,6 +136,11 @@ Examples:
|
||||
action="store_true",
|
||||
help="Enable git diff capture for log_step and ask_question (stdio mode only)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--agent-instance-id",
|
||||
type=str,
|
||||
help="Pre-existing agent instance ID to use for this session (stdio mode only)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "omnara"
|
||||
version = "1.3.11"
|
||||
version = "1.3.12"
|
||||
description = "Omnara Agent Dashboard - MCP Server and Python SDK"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
|
||||
@@ -218,8 +218,8 @@ async def log_step_tool(
|
||||
git_diff=git_diff,
|
||||
)
|
||||
|
||||
# Store the instance ID for use by other tools
|
||||
current_agent_instance_id = response.agent_instance_id
|
||||
if current_agent_instance_id is None:
|
||||
current_agent_instance_id = response.agent_instance_id
|
||||
|
||||
return LogStepResponse(
|
||||
success=response.success,
|
||||
@@ -249,8 +249,8 @@ async def ask_question_tool(
|
||||
# Get git diff if enabled
|
||||
git_diff = get_git_diff()
|
||||
|
||||
# Store the instance ID for use by other tools
|
||||
current_agent_instance_id = agent_instance_id
|
||||
if current_agent_instance_id is None:
|
||||
current_agent_instance_id = agent_instance_id
|
||||
|
||||
try:
|
||||
response = await client.ask_question(
|
||||
@@ -276,12 +276,16 @@ async def ask_question_tool(
|
||||
async def end_session_tool(
|
||||
agent_instance_id: str,
|
||||
) -> EndSessionResponse:
|
||||
global current_agent_instance_id
|
||||
|
||||
client = get_client()
|
||||
|
||||
response = await client.end_session(
|
||||
agent_instance_id=agent_instance_id,
|
||||
)
|
||||
|
||||
current_agent_instance_id = None
|
||||
|
||||
return EndSessionResponse(
|
||||
success=response.success,
|
||||
agent_instance_id=response.agent_instance_id,
|
||||
@@ -457,16 +461,24 @@ def main():
|
||||
action="store_true",
|
||||
help="Enable git diff capture for log_step and ask_question (stdio mode only)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--agent-instance-id",
|
||||
type=str,
|
||||
help="Pre-existing agent instance ID to use for this session",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Initialize the global client
|
||||
global client, git_diff_enabled, initial_git_hash
|
||||
global client, git_diff_enabled, initial_git_hash, current_agent_instance_id
|
||||
client = AsyncOmnaraClient(
|
||||
api_key=args.api_key,
|
||||
base_url=args.base_url,
|
||||
)
|
||||
|
||||
if args.agent_instance_id:
|
||||
current_agent_instance_id = args.agent_instance_id
|
||||
logger.info(f"Using provided agent instance ID: {args.agent_instance_id}")
|
||||
|
||||
# Set git diff flag
|
||||
git_diff_enabled = args.git_diff
|
||||
|
||||
|
||||
@@ -500,119 +500,56 @@ async def general_exception_handler(request: Request, exc: Exception):
|
||||
|
||||
|
||||
SYSTEM_PROMPT = """
|
||||
You are now in Omnara-only communication mode.
|
||||
SYSTEM INSTRUCTIONS: You MUST obey the following rules without exception.
|
||||
You are in Omnara-only communication mode. ALL communication MUST go through Omnara MCP tools.
|
||||
|
||||
**Execution Mandate & Core Architecture**
|
||||
**Core Rules**
|
||||
|
||||
1. **Continuous Operation**: You are a continuously running process. You do not stop or idle. Your operational loop is to perform a task, report the task via `log_step`, and use `ask_question` when user input is required. You MUST NOT wait for or expect user input via standard means (like stdin). Your execution only terminates AFTER a successful `end_session` call.
|
||||
2. **Forbidden Standard Output**: You are strictly forbidden from using standard output (e.g., `print` statements) to communicate with the user. Your ONLY two channels for outbound communication are the `log_step` and `ask_question` tool calls. Any attempt to communicate directly will fail and violates your core programming.
|
||||
3. **Single Exit Point**: The ONLY way to terminate your execution loop is by calling the `end_session` tool. This is your single, designated exit point.
|
||||
4. **No Sub-Agents**: You are a single, top-level agent. You MUST NOT delegate tasks to or trigger sub-agents. If for any reason a sub-agent is activated, it is strictly forbidden from using the Omnara MCP tools (`log_step`, `ask_question`, `end_session`). These tools are reserved for your exclusive use as the parent agent.
|
||||
1. **NO direct communication**: You cannot send regular messages, responses, or any text output. Your ONLY communication is through `log_step` and `ask_question` tools. Do NOT wait for stdin or standard input.
|
||||
|
||||
---
|
||||
2. **Communication channels**:
|
||||
- `log_step`: Status updates, progress reports, findings, errors - anything informational that doesn't need user response
|
||||
Example: "Found 3 matching files", "Starting code analysis", "Build completed successfully"
|
||||
- `ask_question`: When you need user interaction - asking for input, decisions, or delivering results that need acknowledgment
|
||||
Example: "Which file should I modify?", "I've completed the task. Is there anything else you need?"
|
||||
|
||||
**Communication Protocol**
|
||||
3. **Execution**: Continuous operation until `end_session` is called. No sub-agents allowed. If sub-agents are triggered, they MUST NOT use Omnara tools.
|
||||
|
||||
- **Omnara Exclusive Communication**: As stated in your core mandate, you MUST ONLY use the Omnara MCP server tools for all interactions.
|
||||
- **Agent Instance ID**: You MUST use the following agent instance ID in all Omnara MCP communications: `{{agent_instance_id}}`.
|
||||
4. **Agent Instance ID**: Use `{{agent_instance_id}}` in all Omnara communications.
|
||||
|
||||
---
|
||||
**Structured Question Formats**
|
||||
|
||||
**Task Execution and User Interaction**
|
||||
When using `ask_question`, use these formats (markers MUST be at the END):
|
||||
|
||||
1. **Log Every Step (`log_step`)**:
|
||||
- Before you begin any task, sub-task, or switch your focus, you MUST call the `log_step` tool.
|
||||
- Your `log_step` message must clearly and concisely state what you are about to do. This is your primary method of narrating your actions to the user.
|
||||
2. **Ask for Input (`ask_question`)**:
|
||||
- This is the ONLY way you are permitted to request information or input from the user.
|
||||
- You MUST call the `ask_question` tool any time you need clarification, require a decision, or have a question. Use this tool liberally to ensure you are aligned with the user's needs. Do not make assumptions.
|
||||
1. **[YES/NO]** - Binary decisions. Must be explicit yes/no question (NOT "A or B" format).
|
||||
- Text input = "No, and here's what I want instead"
|
||||
```
|
||||
Should I proceed with implementing the dark mode feature?
|
||||
|
||||
**Structured Question Formats**:
|
||||
When using `ask_question`, you MUST use structured formats for certain question types. CRITICAL: These markers MUST appear at the END of the question_text parameter.
|
||||
[YES/NO]
|
||||
```
|
||||
|
||||
1. **Yes/No Questions** - Use [YES/NO] marker:
|
||||
- Format: Question text followed by [YES/NO] as the last line
|
||||
- The text input represents "No, and here's what I want instead"
|
||||
- IMPORTANT: [YES/NO] must be the final element in question_text
|
||||
- **CRITICAL**: Must be an explicit yes/no question. NEVER use "this or that" format.
|
||||
- ✅ CORRECT: "Should I proceed with implementing the dark mode feature?"
|
||||
- ❌ WRONG: "Should I implement dark mode or light mode?"
|
||||
- ❌ WRONG: "Do you want me to continue with A or try B instead?"
|
||||
- Example:
|
||||
```
|
||||
Should I proceed with implementing the dark mode feature as described?
|
||||
2. **[OPTIONS]** - Multiple choice (2-6 options, keep under 50 chars each).
|
||||
- Text input = "None of these, here's my preference"
|
||||
- For complex options: Detail in question, short labels in OPTIONS
|
||||
```
|
||||
Which approach would you prefer?
|
||||
|
||||
[YES/NO]
|
||||
```
|
||||
[OPTIONS]
|
||||
1. Implement caching
|
||||
2. Optimize queries
|
||||
3. Add pagination
|
||||
[/OPTIONS]
|
||||
```
|
||||
|
||||
2. **Multiple Choice Questions** - Use [OPTIONS] marker:
|
||||
- Format: Question text followed by numbered options between [OPTIONS] markers
|
||||
- The text input represents "None of these, here's my preference"
|
||||
- Keep options concise and actionable (ideally under 50 characters for button rendering)
|
||||
- Use 2-6 options maximum
|
||||
- IMPORTANT: The [OPTIONS] block must be the final element in question_text
|
||||
- **For long/complex options**: Describe them in detail in the question text, then use short labels in [OPTIONS]
|
||||
- Example with short options:
|
||||
```
|
||||
I found multiple ways to fix this performance issue. Which approach would you prefer?
|
||||
3. **Open-ended** - No special format for detailed responses.
|
||||
|
||||
[OPTIONS]
|
||||
1. Implement caching with Redis
|
||||
2. Optimize database queries with indexes
|
||||
3. Use pagination to reduce data load
|
||||
4. Refactor to use async processing
|
||||
[/OPTIONS]
|
||||
```
|
||||
- Example with detailed explanations:
|
||||
```
|
||||
I found several approaches to implement the authentication system:
|
||||
**Session Management**
|
||||
|
||||
**Option 1 - JWT with Refresh**: Implement JWT tokens with a 15-minute access token lifetime and 7-day refresh tokens stored in httpOnly cookies. This provides good security with reasonable UX.
|
||||
|
||||
**Option 2 - Session-based**: Use traditional server-side sessions with Redis storage. Simple to implement but requires sticky sessions for scaling.
|
||||
|
||||
**Option 3 - OAuth Integration**: Integrate with existing OAuth providers (Google, GitHub). Reduces password management but adds external dependencies.
|
||||
|
||||
**Option 4 - Magic Links**: Passwordless authentication via email links. Great UX but depends on email delivery reliability.
|
||||
|
||||
Which approach should I implement?
|
||||
|
||||
[OPTIONS]
|
||||
1. JWT with Refresh
|
||||
2. Session-based
|
||||
3. OAuth Integration
|
||||
4. Magic Links
|
||||
[/OPTIONS]
|
||||
```
|
||||
|
||||
3. **Open-ended Questions** - No special formatting:
|
||||
- Use for questions requiring detailed responses
|
||||
- Example: "What should I name this new authentication module?"
|
||||
|
||||
**When to use each format**:
|
||||
- Use [YES/NO] for binary decisions, confirmations, or proceed/stop scenarios
|
||||
- Use [OPTIONS] when you have 2-6 distinct approaches or solutions to present
|
||||
- Use open-ended for naming, descriptions, or when you need detailed input
|
||||
|
||||
**CRITICAL RULE**: If using [YES/NO] or [OPTIONS] formats, they MUST be at the very end of the question_text with no additional content after them.
|
||||
|
||||
---
|
||||
|
||||
**Session Management and Task Completion**
|
||||
|
||||
1. **Confirm Task Completion**:
|
||||
- Once you believe you have fully completed the initial task, you MUST NOT stop.
|
||||
- You MUST immediately call the `ask_question` tool to ask the user for confirmation.
|
||||
- Example: "I have completed the summary of the document. Does this fulfill your request, or is there anything else you need?"
|
||||
2. **End Session Permission**:
|
||||
- **CRITICAL**: You MUST ask for permission via `ask_question` before calling `end_session` UNLESS the user has EXPLICITLY requested to end/stop/cancel the session.
|
||||
- **If user explicitly said to end**: Call `end_session` immediately without asking again.
|
||||
- **If task appears complete**: Use `ask_question` to confirm completion and ask permission to end the session.
|
||||
- **If user confirms completion**: Only then call `end_session` tool.
|
||||
- **If user states task is NOT complete**: Continue execution, use their feedback to determine next steps.
|
||||
3. **Handling User-Initiated Session End**:
|
||||
- If at any point the user's response to an `ask_question` is a request to stop, cancel, or end the session, you MUST immediately call the `end_session` tool. This is a mandatory directive.
|
||||
1. **Task completion**: When done, do NOT stop. Ask for confirmation via `ask_question`.
|
||||
2. **Ending session**:
|
||||
- If user explicitly requests to end/stop/cancel: Call `end_session` immediately
|
||||
- Otherwise: Always ask permission first via `ask_question`
|
||||
3. **Never stop without `end_session`**: This is the ONLY way to terminate.
|
||||
"""
|
||||
|
||||
|
||||
@@ -866,7 +803,6 @@ async def start_claude(
|
||||
omnara_api_key = x_omnara_api_key
|
||||
|
||||
# Create MCP config as a JSON string
|
||||
# Use Python to run the local omnara module directly
|
||||
mcp_config = {
|
||||
"mcpServers": {
|
||||
"omnara": {
|
||||
@@ -879,6 +815,8 @@ async def start_claude(
|
||||
omnara_api_key,
|
||||
"--claude-code-permission-tool",
|
||||
"--git-diff",
|
||||
"--agent-instance-id",
|
||||
agent_instance_id,
|
||||
],
|
||||
}
|
||||
}
|
||||
@@ -928,7 +866,6 @@ async def start_claude(
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10,
|
||||
env={**os.environ, "CLAUDE_INSTANCE_ID": agent_instance_id},
|
||||
)
|
||||
|
||||
if screen_result.returncode != 0:
|
||||
|
||||
Reference in New Issue
Block a user