mirror of
https://github.com/langchain-ai/mcpdoc.git
synced 2025-10-19 03:18:14 +03:00
Fix: handle client-side meta refresh redirects (#40)
Fixes the "Redirecting..." response issue by adding support for HTML meta refresh redirects in `mcpdoc/main.py`. - Parses `<meta http-equiv="refresh">` tags to follow client-side redirects - Consistent with existing `--follow-redirects` flag behavior - Resolves cases where documentation sites use meta refresh instead of HTTP redirects Modified: `mcpdoc/main.py`
This commit is contained in:
@@ -1,7 +1,8 @@
|
||||
"""MCP Llms-txt server for docs."""
|
||||
|
||||
import os
|
||||
from urllib.parse import urlparse
|
||||
import re
|
||||
from urllib.parse import urlparse, urljoin
|
||||
|
||||
import httpx
|
||||
from markdownify import markdownify
|
||||
@@ -228,7 +229,8 @@ def create_server(
|
||||
|
||||
@server.tool(description=fetch_docs_description)
|
||||
async def fetch_docs(url: str) -> str:
|
||||
nonlocal domains
|
||||
nonlocal domains, follow_redirects
|
||||
url = url.strip()
|
||||
# Handle local file paths (either as file:// URLs or direct filesystem paths)
|
||||
if not _is_http_or_https(url):
|
||||
abs_path = _normalize_path(url)
|
||||
@@ -255,7 +257,33 @@ def create_server(
|
||||
try:
|
||||
response = await httpx_client.get(url, timeout=timeout)
|
||||
response.raise_for_status()
|
||||
return markdownify(response.text)
|
||||
content = response.text
|
||||
|
||||
if follow_redirects:
|
||||
# Check for meta refresh tag which indicates a client-side redirect
|
||||
match = re.search(
|
||||
r'<meta http-equiv="refresh" content="[^;]+;\s*url=([^"]+)"',
|
||||
content,
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
if match:
|
||||
redirect_url = match.group(1)
|
||||
new_url = urljoin(str(response.url), redirect_url)
|
||||
|
||||
if "*" not in domains and not any(
|
||||
new_url.startswith(domain) for domain in domains
|
||||
):
|
||||
return (
|
||||
"Error: Redirect URL not allowed. Must start with one of the following domains: "
|
||||
+ ", ".join(domains)
|
||||
)
|
||||
|
||||
response = await httpx_client.get(new_url, timeout=timeout)
|
||||
response.raise_for_status()
|
||||
content = response.text
|
||||
|
||||
return markdownify(content)
|
||||
except (httpx.HTTPStatusError, httpx.RequestError) as e:
|
||||
return f"Encountered an HTTP error: {str(e)}"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user