mirror of
https://github.com/langchain-ai/mcpdoc.git
synced 2025-10-19 03:18:14 +03:00
Add local file reading
This commit is contained in:
@@ -20,6 +20,9 @@ curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
#### Choose an `llms.txt` file to use.
|
||||
* For example, [here's](https://langchain-ai.github.io/langgraph/llms.txt) the LangGraph `llms.txt` file.
|
||||
|
||||
The domains list gets populated with the sources of your llms.txt files, but since you're
|
||||
using a local file, no domains are automatically added to the allowed list.
|
||||
|
||||
#### (Optional) Test the MCP server locally with your `llms.txt` file of choice:
|
||||
```bash
|
||||
uvx --from mcpdoc mcpdoc \
|
||||
|
||||
@@ -25,6 +25,10 @@ Examples:
|
||||
# Directly specifying llms.txt URLs with optional names
|
||||
mcpdoc --urls LangGraph:https://langchain-ai.github.io/langgraph/llms.txt
|
||||
|
||||
# Using a local file (absolute or relative path)
|
||||
mcpdoc --urls LocalDocs:/path/to/llms.txt
|
||||
mcpdoc --urls LocalDocs:file:///path/to/llms.txt
|
||||
|
||||
# Using a YAML config file
|
||||
mcpdoc --yaml sample_config.yaml
|
||||
|
||||
@@ -72,7 +76,7 @@ def parse_args() -> argparse.Namespace:
|
||||
"-u",
|
||||
type=str,
|
||||
nargs="+",
|
||||
help="List of llms.txt URLs with optional names (format: 'url' or 'name:url')",
|
||||
help="List of llms.txt URLs or file paths with optional names (format: 'url_or_path' or 'name:url_or_path')",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
@@ -84,7 +88,7 @@ def parse_args() -> argparse.Namespace:
|
||||
"--allowed-domains",
|
||||
type=str,
|
||||
nargs="*",
|
||||
help="Additional allowed domains to fetch documentation from. Use '*' to allow all domains",
|
||||
help="Additional allowed domains to fetch documentation from. Use '*' to allow all domains. When using only local files, all domains are allowed by default.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--timeout", type=float, default=10.0, help="HTTP request timeout in seconds"
|
||||
@@ -163,10 +167,11 @@ def load_config_file(file_path: str, file_format: str) -> List[Dict[str, str]]:
|
||||
|
||||
|
||||
def create_doc_sources_from_urls(urls: List[str]) -> List[DocSource]:
|
||||
"""Create doc sources from a list of URLs with optional names.
|
||||
"""Create doc sources from a list of URLs or file paths with optional names.
|
||||
|
||||
Args:
|
||||
urls: List of llms.txt URLs with optional names (format: 'url' or 'name:url')
|
||||
urls: List of llms.txt URLs or file paths with optional names
|
||||
(format: 'url_or_path' or 'name:url_or_path')
|
||||
|
||||
Returns:
|
||||
List of DocSource objects
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import os
|
||||
import httpx
|
||||
from markdownify import markdownify
|
||||
from mcp.server.fastmcp import FastMCP
|
||||
@@ -53,6 +54,7 @@ def create_server(
|
||||
Use ['*'] to allow all domains
|
||||
The domain hosting the llms.txt file is always appended to the list
|
||||
of allowed domains.
|
||||
When using only local files, all domains ('*') are allowed by default.
|
||||
|
||||
Returns:
|
||||
A FastMCP server instance configured with documentation tools
|
||||
@@ -73,31 +75,52 @@ def create_server(
|
||||
"""List all available documentation sources.
|
||||
|
||||
This is the first tool you should call in the documentation workflow.
|
||||
It provides URLs to llms.txt files that the user has made available.
|
||||
It provides URLs to llms.txt files or local file paths that the user has made available.
|
||||
|
||||
Returns:
|
||||
A string containing a formatted list of documentation sources with their URLs
|
||||
A string containing a formatted list of documentation sources with their URLs or file paths
|
||||
"""
|
||||
content = ""
|
||||
for entry in doc_source:
|
||||
name = entry.get("name", "") or extract_domain(entry["llms_txt"])
|
||||
url_or_path = entry["llms_txt"]
|
||||
# For local paths or file:// URLs, use a different label
|
||||
is_local = url_or_path.startswith("file://") or os.path.exists(url_or_path)
|
||||
|
||||
if is_local:
|
||||
name = entry.get("name", "") or os.path.basename(url_or_path.replace("file://", ""))
|
||||
content += f"{name}\n"
|
||||
content += "URL: " + entry["llms_txt"] + "\n\n"
|
||||
content += "Path: " + url_or_path + "\n\n"
|
||||
else:
|
||||
name = entry.get("name", "") or extract_domain(url_or_path)
|
||||
content += f"{name}\n"
|
||||
content += "URL: " + url_or_path + "\n\n"
|
||||
return content
|
||||
|
||||
# Parse the domain names in the llms.txt URLs
|
||||
domains = set(extract_domain(entry["llms_txt"]) for entry in doc_source)
|
||||
# Parse the domain names in the llms.txt URLs and identify local file paths
|
||||
domains = set()
|
||||
has_local_files = False
|
||||
|
||||
# Add additional allowed domains if specified
|
||||
for entry in doc_source:
|
||||
url = entry["llms_txt"]
|
||||
if url.startswith("file://") or os.path.exists(url):
|
||||
# Local file - mark that we have local files
|
||||
has_local_files = True
|
||||
continue
|
||||
domains.add(extract_domain(url))
|
||||
|
||||
# Add additional allowed domains if specified, or set to '*' if we have local files
|
||||
if allowed_domains:
|
||||
if "*" in allowed_domains:
|
||||
domains = {"*"} # Special marker for allowing all domains
|
||||
else:
|
||||
domains.update(allowed_domains)
|
||||
elif has_local_files and not domains:
|
||||
# If we have local files and no domains added yet, allow all domains by default
|
||||
domains = {"*"}
|
||||
|
||||
@server.tool()
|
||||
async def fetch_docs(url: str) -> str:
|
||||
"""Fetch and parse documentation from a given URL.
|
||||
"""Fetch and parse documentation from a given URL or local file.
|
||||
|
||||
Use this tool after list_doc_sources to:
|
||||
1. First fetch the llms.txt file from a documentation source
|
||||
@@ -105,13 +128,37 @@ def create_server(
|
||||
3. Then fetch specific documentation pages relevant to the user's question
|
||||
|
||||
Args:
|
||||
url: The URL to fetch documentation from. Must be from an allowed domain.
|
||||
url: The URL or file path to fetch documentation from. Can be:
|
||||
- URL from an allowed domain
|
||||
- A local file path (absolute or relative)
|
||||
- A file:// URL (e.g., file:///path/to/llms.txt)
|
||||
|
||||
Returns:
|
||||
The fetched documentation content converted to markdown, or an error message
|
||||
if the request fails or the URL is not from an allowed domain.
|
||||
"""
|
||||
nonlocal domains
|
||||
|
||||
# Handle local file paths (either as file:// URLs or direct filesystem paths)
|
||||
if url.startswith("file://"):
|
||||
file_path = url[7:] # Remove the file:// prefix
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
return markdownify(content)
|
||||
except Exception as e:
|
||||
return f"Error reading local file: {str(e)}"
|
||||
|
||||
# Check if it's a direct filesystem path that exists
|
||||
if os.path.exists(url):
|
||||
try:
|
||||
with open(url, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
return markdownify(content)
|
||||
except Exception as e:
|
||||
return f"Error reading local file: {str(e)}"
|
||||
|
||||
# Otherwise treat as URL
|
||||
if "*" not in domains and not any(url.startswith(domain) for domain in domains):
|
||||
return (
|
||||
"Error: URL not allowed. Must start with one of the following domains: "
|
||||
@@ -123,6 +170,6 @@ def create_server(
|
||||
response.raise_for_status()
|
||||
return markdownify(response.text)
|
||||
except (httpx.HTTPStatusError, httpx.RequestError) as e:
|
||||
return f"Encountered an HTTP error with code {e.response.status_code}"
|
||||
return f"Encountered an HTTP error: {str(e)}"
|
||||
|
||||
return server
|
||||
|
||||
Reference in New Issue
Block a user