feat: add a reference mcp servers (#5)

Signed-off-by: simon-mo <simon.mo@hey.com>
2025-08-06 00:55:46 +03:00 · 2025-08-05 10:41:01 -07:00
parent 71a363c21d
commit c8f0d136cc
6 changed files with 345 additions and 0 deletions
--- a/gpt-oss-mcp-server/README.md
+++ b/gpt-oss-mcp-server/README.md
@@ -0,0 +1,29 @@
+# MCP Servers for gpt-oss reference tools
+
+This directory contains MCP servers for the reference tools in the [gpt-oss](https://github.com/openai/gpt-oss) repository.
+You can set up these tools behind MCP servers and use them in your applications. 
+For inference service that integrates with MCP, you can also use these as reference tools. 
+
+In particular, this directory contains a `build-system-prompt.py` script that will generate exactly the same system prompt as `reference-system-prompt.py`.
+The build system prompt script show case all the care needed to automatically discover the tools and construct the system prompt before feeding it into Harmony.
+
+## Usage
+
+```bash
+# Install the dependencies
+uv pip install -r requirements.txt
+```
+
+```bash
+# Assume we have harmony and gpt-oss installed
+uv pip install mcp[cli]
+# start the servers
+mcp run -t sse browser_server.py:mcp
+mcp run -t sse python_server.py:mcp
+```
+
+You can now use MCP inspector to play with the tools. 
+Once opened, set SSE to `http://localhost:8001/sse` and `http://localhost:8000/sse` respectively.
+
+To compare the system prompt and see how to construct it via MCP service discovery, see `build-system-prompt.py`. 
+This script will generate exactly the same system prompt as `reference-system-prompt.py`.
--- a/gpt-oss-mcp-server/browser_server.py
+++ b/gpt-oss-mcp-server/browser_server.py
@@ -0,0 +1,114 @@
+from collections.abc import AsyncIterator
+from contextlib import asynccontextmanager
+from dataclasses import dataclass, field
+from typing import Union, Optional
+
+from mcp.server.fastmcp import Context, FastMCP
+from gpt_oss.tools.simple_browser import SimpleBrowserTool
+from gpt_oss.tools.simple_browser.backend import ExaBackend
+
+
+@dataclass
+class AppContext:
+    browsers: dict[str, SimpleBrowserTool] = field(default_factory=dict)
+
+    def create_or_get_browser(self, session_id: str) -> SimpleBrowserTool:
+        if session_id not in self.browsers:
+            backend = ExaBackend(source="web")
+            self.browsers[session_id] = SimpleBrowserTool(backend=backend)
+        return self.browsers[session_id]
+
+    def remove_browser(self, session_id: str) -> None:
+        self.browsers.pop(session_id, None)
+
+
+@asynccontextmanager
+async def app_lifespan(_server: FastMCP) -> AsyncIterator[AppContext]:
+    yield AppContext()
+
+
+# Pass lifespan to server
+mcp = FastMCP(
+    name="browser",
+    instructions=r"""
+Tool for browsing.
+The `cursor` appears in brackets before each browsing display: `[{cursor}]`.
+Cite information from the tool using the following format:
+`【{cursor}†L{line_start}(-L{line_end})?】`, for example: `【6†L9-L11】` or `【8†L3】`. 
+Do not quote more than 10 words directly from the tool output.
+sources=web
+""".strip(),
+    lifespan=app_lifespan,
+    port=8001,
+)
+
+
+@mcp.tool(
+    name="search",
+    title="Search for information",
+    description=
+    "Searches for information related to `query` and displays `topn` results.",
+)
+async def search(ctx: Context,
+                 query: str,
+                 topn: int = 10,
+                 source: Optional[str] = None) -> str:
+    """Search for information related to a query"""
+    browser = ctx.request_context.lifespan_context.create_or_get_browser(
+        ctx.client_id)
+    messages = []
+    async for message in browser.search(query=query, topn=topn, source=source):
+        if message.content and hasattr(message.content[0], 'text'):
+            messages.append(message.content[0].text)
+    return "\n".join(messages)
+
+
+@mcp.tool(
+    name="open",
+    title="Open a link or page",
+    description="""
+Opens the link `id` from the page indicated by `cursor` starting at line number `loc`, showing `num_lines` lines.
+Valid link ids are displayed with the formatting: `【{id}†.*】`.
+If `cursor` is not provided, the most recent page is implied.
+If `id` is a string, it is treated as a fully qualified URL associated with `source`.
+If `loc` is not provided, the viewport will be positioned at the beginning of the document or centered on the most relevant passage, if available.
+Use this function without `id` to scroll to a new location of an opened page.
+""".strip(),
+)
+async def open_link(ctx: Context,
+                    id: Union[int, str] = -1,
+                    cursor: int = -1,
+                    loc: int = -1,
+                    num_lines: int = -1,
+                    view_source: bool = False,
+                    source: Optional[str] = None) -> str:
+    """Open a link or navigate to a page location"""
+    browser = ctx.request_context.lifespan_context.create_or_get_browser(
+        ctx.client_id)
+    messages = []
+    async for message in browser.open(id=id,
+                                      cursor=cursor,
+                                      loc=loc,
+                                      num_lines=num_lines,
+                                      view_source=view_source,
+                                      source=source):
+        if message.content and hasattr(message.content[0], 'text'):
+            messages.append(message.content[0].text)
+    return "\n".join(messages)
+
+
+@mcp.tool(
+    name="find",
+    title="Find pattern in page",
+    description=
+    "Finds exact matches of `pattern` in the current page, or the page given by `cursor`.",
+)
+async def find_pattern(ctx: Context, pattern: str, cursor: int = -1) -> str:
+    """Find exact matches of a pattern in the current page"""
+    browser = ctx.request_context.lifespan_context.create_or_get_browser(
+        ctx.client_id)
+    messages = []
+    async for message in browser.find(pattern=pattern, cursor=cursor):
+        if message.content and hasattr(message.content[0], 'text'):
+            messages.append(message.content[0].text)
+    return "\n".join(messages)
--- a/gpt-oss-mcp-server/build-system-prompt.py
+++ b/gpt-oss-mcp-server/build-system-prompt.py
@@ -0,0 +1,115 @@
+import datetime
+import asyncio
+
+from gpt_oss.tokenizer import tokenizer
+
+from openai_harmony import (
+    Conversation,
+    DeveloperContent,
+    HarmonyEncodingName,
+    Message,
+    ReasoningEffort,
+    Role,
+    SystemContent,
+    ToolNamespaceConfig,
+    ToolDescription,
+    load_harmony_encoding,
+)
+
+from mcp import ClientSession
+from mcp.client.sse import sse_client
+from mcp.types import ListToolsResult
+
+
+async def list_server_and_tools(server_url: str):
+    async with sse_client(url=server_url) as streams, ClientSession(
+            *streams) as session:
+        initialize_response = await session.initialize()
+        list_tools_response = await session.list_tools()
+        return initialize_response, list_tools_response
+
+
+def trim_schema(schema: dict) -> dict:
+    # Turn JSON Schema from MCP generated into Harmony's variant.
+    if "title" in schema:
+        del schema["title"]
+    if "default" in schema and schema["default"] is None:
+        del schema["default"]
+    if "anyOf" in schema:
+        # Turn "anyOf": [{"type": "type-1"}, {"type": "type-2"}] into "type": ["type-1", "type-2"]
+        # if there's more than 1 types, also remove "null" type as Harmony will just ignore it
+        types = [
+            type_dict["type"] for type_dict in schema["anyOf"]
+            if type_dict["type"] != 'null'
+        ]
+        schema["type"] = types
+        del schema["anyOf"]
+    if "properties" in schema:
+        schema["properties"] = {
+            k: trim_schema(v)
+            for k, v in schema["properties"].items()
+        }
+    return schema
+
+
+def post_process_tools_description(
+        list_tools_result: ListToolsResult) -> ListToolsResult:
+    # Adapt the MCP tool result for Harmony
+    for tool in list_tools_result.tools:
+        tool.inputSchema = trim_schema(tool.inputSchema)
+
+    # Some tools schema don't need to be part of the prompt (e.g. simple text in text out for Python)
+    list_tools_result.tools = [
+        tool for tool in list_tools_result.tools
+        if getattr(tool.annotations, "include_in_prompt", True)
+    ]
+
+    return list_tools_result
+
+
+tools_urls = [
+    "http://localhost:8001/sse",  # browser
+    "http://localhost:8000/sse",  # python
+]
+harmony_tool_descriptions = []
+for tools_url in tools_urls:
+
+    initialize_response, list_tools_response = asyncio.run(
+        list_server_and_tools(tools_url))
+
+    list_tools_response = post_process_tools_description(list_tools_response)
+
+    tool_from_mcp = ToolNamespaceConfig(
+        name=initialize_response.serverInfo.name,
+        description=initialize_response.instructions,
+        tools=[
+            ToolDescription.new(name=tool.name,
+                                description=tool.description,
+                                parameters=tool.inputSchema)
+            for tool in list_tools_response.tools
+        ])
+    harmony_tool_descriptions.append(tool_from_mcp)
+
+encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
+
+system_message_content = (SystemContent.new().with_reasoning_effort(
+    ReasoningEffort.LOW).with_conversation_start_date(
+        datetime.datetime.now().strftime("%Y-%m-%d")))
+
+for tool_description in harmony_tool_descriptions:
+    system_message_content = system_message_content.with_tools(
+        tool_description)
+
+system_message = Message.from_role_and_content(Role.SYSTEM,
+                                               system_message_content)
+
+developer_message_content = DeveloperContent.new().with_instructions("")
+developer_message = Message.from_role_and_content(Role.DEVELOPER,
+                                                  developer_message_content)
+
+messages = [system_message, developer_message]
+
+conversation = Conversation.from_messages(messages)
+tokens = encoding.render_conversation(conversation)
+system_message = tokenizer.decode(tokens)
+print(system_message)
--- a/gpt-oss-mcp-server/pyproject.toml
+++ b/gpt-oss-mcp-server/pyproject.toml
@@ -0,0 +1,8 @@
+[project]
+name = "gpt-oss-mcp-server"
+version = "0.1.0"
+requires-python = ">=3.10"
+dependencies = [
+    "mcp[cli]>=1.12.2",
+    # "gpt_oss"
+]
--- a/gpt-oss-mcp-server/python_server.py
+++ b/gpt-oss-mcp-server/python_server.py
@@ -0,0 +1,33 @@
+from mcp.server.fastmcp import FastMCP
+from gpt_oss.tools.python_docker.docker_tool import PythonTool
+from openai_harmony import Message, TextContent, Author, Role
+
+# Pass lifespan to server
+mcp = FastMCP(
+    name="python",
+    instructions=r"""
+Use this tool to execute Python code in your chain of thought. The code will not be shown to the user. This tool should be used for internal reasoning, but not for code that is intended to be visible to the user (e.g. when creating plots, tables, or files).
+When you send a message containing python code to python, it will be executed in a stateless docker container, and the stdout of that process will be returned to you.
+""".strip(),
+)
+
+
+@mcp.tool(
+    name="python",
+    title="Execute Python code",
+    description="""
+Use this tool to execute Python code in your chain of thought. The code will not be shown to the user. This tool should be used for internal reasoning, but not for code that is intended to be visible to the user (e.g. when creating plots, tables, or files).
+When you send a message containing python code to python, it will be executed in a stateless docker container, and the stdout of that process will be returned to you.
+    """,
+    annotations={
+        # Harmony format don't wnat this schema to be part of it because it's simple text in text out
+        "include_in_prompt": False,
+    })
+async def python(code: str) -> str:
+    tool = PythonTool()
+    messages = []
+    async for message in tool.process(
+            Message(author=Author(role=Role.TOOL, name="python"),
+                    content=[TextContent(text=code)])):
+        messages.append(message)
+    return "\n".join([message.content[0].text for message in messages])
--- a/gpt-oss-mcp-server/reference-system-prompt.py
+++ b/gpt-oss-mcp-server/reference-system-prompt.py
@@ -0,0 +1,46 @@
+import datetime
+
+from gpt_oss.tools.simple_browser import SimpleBrowserTool
+from gpt_oss.tools.simple_browser.backend import ExaBackend
+from gpt_oss.tools.python_docker.docker_tool import PythonTool
+from gpt_oss.tokenizer import tokenizer
+
+from openai_harmony import (
+    Conversation,
+    DeveloperContent,
+    HarmonyEncodingName,
+    Message,
+    ReasoningEffort,
+    Role,
+    SystemContent,
+    load_harmony_encoding,
+)
+
+encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
+
+system_message_content = (SystemContent.new().with_reasoning_effort(
+    ReasoningEffort.LOW).with_conversation_start_date(
+        datetime.datetime.now().strftime("%Y-%m-%d")))
+
+backend = ExaBackend(source="web", )
+browser_tool = SimpleBrowserTool(backend=backend)
+system_message_content = system_message_content.with_tools(
+    browser_tool.tool_config)
+
+python_tool = PythonTool()
+system_message_content = system_message_content.with_tools(
+    python_tool.tool_config)
+
+system_message = Message.from_role_and_content(Role.SYSTEM,
+                                               system_message_content)
+
+developer_message_content = DeveloperContent.new().with_instructions("")
+developer_message = Message.from_role_and_content(Role.DEVELOPER,
+                                                  developer_message_content)
+
+messages = [system_message, developer_message]
+
+conversation = Conversation.from_messages(messages)
+tokens = encoding.render_conversation(conversation)
+system_message = tokenizer.decode(tokens)
+print(system_message)