Fix: handle client-side meta refresh redirects (#40)

Fixes the "Redirecting..." response issue by adding support for HTML
meta refresh redirects in `mcpdoc/main.py`.

- Parses `<meta http-equiv="refresh">` tags to follow client-side
redirects
- Consistent with existing `--follow-redirects` flag behavior
- Resolves cases where documentation sites use meta refresh instead of
HTTP redirects

Modified: `mcpdoc/main.py`
This commit is contained in:
Aliyan Ishfaq
2025-07-22 13:22:39 -07:00
committed by GitHub
parent d1db6319b9
commit a429dc788b

View File

@@ -1,7 +1,8 @@
"""MCP Llms-txt server for docs.""" """MCP Llms-txt server for docs."""
import os import os
from urllib.parse import urlparse import re
from urllib.parse import urlparse, urljoin
import httpx import httpx
from markdownify import markdownify from markdownify import markdownify
@@ -228,7 +229,8 @@ def create_server(
@server.tool(description=fetch_docs_description) @server.tool(description=fetch_docs_description)
async def fetch_docs(url: str) -> str: async def fetch_docs(url: str) -> str:
nonlocal domains nonlocal domains, follow_redirects
url = url.strip()
# Handle local file paths (either as file:// URLs or direct filesystem paths) # Handle local file paths (either as file:// URLs or direct filesystem paths)
if not _is_http_or_https(url): if not _is_http_or_https(url):
abs_path = _normalize_path(url) abs_path = _normalize_path(url)
@@ -255,7 +257,33 @@ def create_server(
try: try:
response = await httpx_client.get(url, timeout=timeout) response = await httpx_client.get(url, timeout=timeout)
response.raise_for_status() response.raise_for_status()
return markdownify(response.text) content = response.text
if follow_redirects:
# Check for meta refresh tag which indicates a client-side redirect
match = re.search(
r'<meta http-equiv="refresh" content="[^;]+;\s*url=([^"]+)"',
content,
re.IGNORECASE,
)
if match:
redirect_url = match.group(1)
new_url = urljoin(str(response.url), redirect_url)
if "*" not in domains and not any(
new_url.startswith(domain) for domain in domains
):
return (
"Error: Redirect URL not allowed. Must start with one of the following domains: "
+ ", ".join(domains)
)
response = await httpx_client.get(new_url, timeout=timeout)
response.raise_for_status()
content = response.text
return markdownify(content)
except (httpx.HTTPStatusError, httpx.RequestError) as e: except (httpx.HTTPStatusError, httpx.RequestError) as e:
return f"Encountered an HTTP error: {str(e)}" return f"Encountered an HTTP error: {str(e)}"