mirror of
https://github.com/langchain-ai/mcpdoc.git
synced 2025-10-19 03:18:14 +03:00
Add local file reading
This commit is contained in:
@@ -20,6 +20,9 @@ curl -LsSf https://astral.sh/uv/install.sh | sh
|
|||||||
#### Choose an `llms.txt` file to use.
|
#### Choose an `llms.txt` file to use.
|
||||||
* For example, [here's](https://langchain-ai.github.io/langgraph/llms.txt) the LangGraph `llms.txt` file.
|
* For example, [here's](https://langchain-ai.github.io/langgraph/llms.txt) the LangGraph `llms.txt` file.
|
||||||
|
|
||||||
|
The domains list gets populated with the sources of your llms.txt files, but since you're
|
||||||
|
using a local file, no domains are automatically added to the allowed list.
|
||||||
|
|
||||||
#### (Optional) Test the MCP server locally with your `llms.txt` file of choice:
|
#### (Optional) Test the MCP server locally with your `llms.txt` file of choice:
|
||||||
```bash
|
```bash
|
||||||
uvx --from mcpdoc mcpdoc \
|
uvx --from mcpdoc mcpdoc \
|
||||||
|
|||||||
@@ -25,6 +25,10 @@ Examples:
|
|||||||
# Directly specifying llms.txt URLs with optional names
|
# Directly specifying llms.txt URLs with optional names
|
||||||
mcpdoc --urls LangGraph:https://langchain-ai.github.io/langgraph/llms.txt
|
mcpdoc --urls LangGraph:https://langchain-ai.github.io/langgraph/llms.txt
|
||||||
|
|
||||||
|
# Using a local file (absolute or relative path)
|
||||||
|
mcpdoc --urls LocalDocs:/path/to/llms.txt
|
||||||
|
mcpdoc --urls LocalDocs:file:///path/to/llms.txt
|
||||||
|
|
||||||
# Using a YAML config file
|
# Using a YAML config file
|
||||||
mcpdoc --yaml sample_config.yaml
|
mcpdoc --yaml sample_config.yaml
|
||||||
|
|
||||||
@@ -72,7 +76,7 @@ def parse_args() -> argparse.Namespace:
|
|||||||
"-u",
|
"-u",
|
||||||
type=str,
|
type=str,
|
||||||
nargs="+",
|
nargs="+",
|
||||||
help="List of llms.txt URLs with optional names (format: 'url' or 'name:url')",
|
help="List of llms.txt URLs or file paths with optional names (format: 'url_or_path' or 'name:url_or_path')",
|
||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@@ -84,7 +88,7 @@ def parse_args() -> argparse.Namespace:
|
|||||||
"--allowed-domains",
|
"--allowed-domains",
|
||||||
type=str,
|
type=str,
|
||||||
nargs="*",
|
nargs="*",
|
||||||
help="Additional allowed domains to fetch documentation from. Use '*' to allow all domains",
|
help="Additional allowed domains to fetch documentation from. Use '*' to allow all domains. When using only local files, all domains are allowed by default.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--timeout", type=float, default=10.0, help="HTTP request timeout in seconds"
|
"--timeout", type=float, default=10.0, help="HTTP request timeout in seconds"
|
||||||
@@ -163,10 +167,11 @@ def load_config_file(file_path: str, file_format: str) -> List[Dict[str, str]]:
|
|||||||
|
|
||||||
|
|
||||||
def create_doc_sources_from_urls(urls: List[str]) -> List[DocSource]:
|
def create_doc_sources_from_urls(urls: List[str]) -> List[DocSource]:
|
||||||
"""Create doc sources from a list of URLs with optional names.
|
"""Create doc sources from a list of URLs or file paths with optional names.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
urls: List of llms.txt URLs with optional names (format: 'url' or 'name:url')
|
urls: List of llms.txt URLs or file paths with optional names
|
||||||
|
(format: 'url_or_path' or 'name:url_or_path')
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of DocSource objects
|
List of DocSource objects
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
import os
|
||||||
import httpx
|
import httpx
|
||||||
from markdownify import markdownify
|
from markdownify import markdownify
|
||||||
from mcp.server.fastmcp import FastMCP
|
from mcp.server.fastmcp import FastMCP
|
||||||
@@ -53,6 +54,7 @@ def create_server(
|
|||||||
Use ['*'] to allow all domains
|
Use ['*'] to allow all domains
|
||||||
The domain hosting the llms.txt file is always appended to the list
|
The domain hosting the llms.txt file is always appended to the list
|
||||||
of allowed domains.
|
of allowed domains.
|
||||||
|
When using only local files, all domains ('*') are allowed by default.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A FastMCP server instance configured with documentation tools
|
A FastMCP server instance configured with documentation tools
|
||||||
@@ -73,31 +75,52 @@ def create_server(
|
|||||||
"""List all available documentation sources.
|
"""List all available documentation sources.
|
||||||
|
|
||||||
This is the first tool you should call in the documentation workflow.
|
This is the first tool you should call in the documentation workflow.
|
||||||
It provides URLs to llms.txt files that the user has made available.
|
It provides URLs to llms.txt files or local file paths that the user has made available.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A string containing a formatted list of documentation sources with their URLs
|
A string containing a formatted list of documentation sources with their URLs or file paths
|
||||||
"""
|
"""
|
||||||
content = ""
|
content = ""
|
||||||
for entry in doc_source:
|
for entry in doc_source:
|
||||||
name = entry.get("name", "") or extract_domain(entry["llms_txt"])
|
url_or_path = entry["llms_txt"]
|
||||||
content += f"{name}\n"
|
# For local paths or file:// URLs, use a different label
|
||||||
content += "URL: " + entry["llms_txt"] + "\n\n"
|
is_local = url_or_path.startswith("file://") or os.path.exists(url_or_path)
|
||||||
|
|
||||||
|
if is_local:
|
||||||
|
name = entry.get("name", "") or os.path.basename(url_or_path.replace("file://", ""))
|
||||||
|
content += f"{name}\n"
|
||||||
|
content += "Path: " + url_or_path + "\n\n"
|
||||||
|
else:
|
||||||
|
name = entry.get("name", "") or extract_domain(url_or_path)
|
||||||
|
content += f"{name}\n"
|
||||||
|
content += "URL: " + url_or_path + "\n\n"
|
||||||
return content
|
return content
|
||||||
|
|
||||||
# Parse the domain names in the llms.txt URLs
|
# Parse the domain names in the llms.txt URLs and identify local file paths
|
||||||
domains = set(extract_domain(entry["llms_txt"]) for entry in doc_source)
|
domains = set()
|
||||||
|
has_local_files = False
|
||||||
|
|
||||||
|
for entry in doc_source:
|
||||||
|
url = entry["llms_txt"]
|
||||||
|
if url.startswith("file://") or os.path.exists(url):
|
||||||
|
# Local file - mark that we have local files
|
||||||
|
has_local_files = True
|
||||||
|
continue
|
||||||
|
domains.add(extract_domain(url))
|
||||||
|
|
||||||
# Add additional allowed domains if specified
|
# Add additional allowed domains if specified, or set to '*' if we have local files
|
||||||
if allowed_domains:
|
if allowed_domains:
|
||||||
if "*" in allowed_domains:
|
if "*" in allowed_domains:
|
||||||
domains = {"*"} # Special marker for allowing all domains
|
domains = {"*"} # Special marker for allowing all domains
|
||||||
else:
|
else:
|
||||||
domains.update(allowed_domains)
|
domains.update(allowed_domains)
|
||||||
|
elif has_local_files and not domains:
|
||||||
|
# If we have local files and no domains added yet, allow all domains by default
|
||||||
|
domains = {"*"}
|
||||||
|
|
||||||
@server.tool()
|
@server.tool()
|
||||||
async def fetch_docs(url: str) -> str:
|
async def fetch_docs(url: str) -> str:
|
||||||
"""Fetch and parse documentation from a given URL.
|
"""Fetch and parse documentation from a given URL or local file.
|
||||||
|
|
||||||
Use this tool after list_doc_sources to:
|
Use this tool after list_doc_sources to:
|
||||||
1. First fetch the llms.txt file from a documentation source
|
1. First fetch the llms.txt file from a documentation source
|
||||||
@@ -105,13 +128,37 @@ def create_server(
|
|||||||
3. Then fetch specific documentation pages relevant to the user's question
|
3. Then fetch specific documentation pages relevant to the user's question
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
url: The URL to fetch documentation from. Must be from an allowed domain.
|
url: The URL or file path to fetch documentation from. Can be:
|
||||||
|
- URL from an allowed domain
|
||||||
|
- A local file path (absolute or relative)
|
||||||
|
- A file:// URL (e.g., file:///path/to/llms.txt)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
The fetched documentation content converted to markdown, or an error message
|
The fetched documentation content converted to markdown, or an error message
|
||||||
if the request fails or the URL is not from an allowed domain.
|
if the request fails or the URL is not from an allowed domain.
|
||||||
"""
|
"""
|
||||||
nonlocal domains
|
nonlocal domains
|
||||||
|
|
||||||
|
# Handle local file paths (either as file:// URLs or direct filesystem paths)
|
||||||
|
if url.startswith("file://"):
|
||||||
|
file_path = url[7:] # Remove the file:// prefix
|
||||||
|
try:
|
||||||
|
with open(file_path, 'r', encoding='utf-8') as f:
|
||||||
|
content = f.read()
|
||||||
|
return markdownify(content)
|
||||||
|
except Exception as e:
|
||||||
|
return f"Error reading local file: {str(e)}"
|
||||||
|
|
||||||
|
# Check if it's a direct filesystem path that exists
|
||||||
|
if os.path.exists(url):
|
||||||
|
try:
|
||||||
|
with open(url, 'r', encoding='utf-8') as f:
|
||||||
|
content = f.read()
|
||||||
|
return markdownify(content)
|
||||||
|
except Exception as e:
|
||||||
|
return f"Error reading local file: {str(e)}"
|
||||||
|
|
||||||
|
# Otherwise treat as URL
|
||||||
if "*" not in domains and not any(url.startswith(domain) for domain in domains):
|
if "*" not in domains and not any(url.startswith(domain) for domain in domains):
|
||||||
return (
|
return (
|
||||||
"Error: URL not allowed. Must start with one of the following domains: "
|
"Error: URL not allowed. Must start with one of the following domains: "
|
||||||
@@ -123,6 +170,6 @@ def create_server(
|
|||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
return markdownify(response.text)
|
return markdownify(response.text)
|
||||||
except (httpx.HTTPStatusError, httpx.RequestError) as e:
|
except (httpx.HTTPStatusError, httpx.RequestError) as e:
|
||||||
return f"Encountered an HTTP error with code {e.response.status_code}"
|
return f"Encountered an HTTP error: {str(e)}"
|
||||||
|
|
||||||
return server
|
return server
|
||||||
|
|||||||
Reference in New Issue
Block a user