Fix: handle client-side meta refresh redirects (#40)

Fixes the "Redirecting..." response issue by adding support for HTML
meta refresh redirects in `mcpdoc/main.py`.

- Parses `<meta http-equiv="refresh">` tags to follow client-side
redirects
- Consistent with existing `--follow-redirects` flag behavior
- Resolves cases where documentation sites use meta refresh instead of
HTTP redirects

Modified: `mcpdoc/main.py`
This commit is contained in:
Aliyan Ishfaq
2025-07-22 13:22:39 -07:00
committed by GitHub
parent d1db6319b9
commit a429dc788b

View File

@@ -1,7 +1,8 @@
"""MCP Llms-txt server for docs."""
import os
from urllib.parse import urlparse
import re
from urllib.parse import urlparse, urljoin
import httpx
from markdownify import markdownify
@@ -228,7 +229,8 @@ def create_server(
@server.tool(description=fetch_docs_description)
async def fetch_docs(url: str) -> str:
nonlocal domains
nonlocal domains, follow_redirects
url = url.strip()
# Handle local file paths (either as file:// URLs or direct filesystem paths)
if not _is_http_or_https(url):
abs_path = _normalize_path(url)
@@ -255,7 +257,33 @@ def create_server(
try:
response = await httpx_client.get(url, timeout=timeout)
response.raise_for_status()
return markdownify(response.text)
content = response.text
if follow_redirects:
# Check for meta refresh tag which indicates a client-side redirect
match = re.search(
r'<meta http-equiv="refresh" content="[^;]+;\s*url=([^"]+)"',
content,
re.IGNORECASE,
)
if match:
redirect_url = match.group(1)
new_url = urljoin(str(response.url), redirect_url)
if "*" not in domains and not any(
new_url.startswith(domain) for domain in domains
):
return (
"Error: Redirect URL not allowed. Must start with one of the following domains: "
+ ", ".join(domains)
)
response = await httpx_client.get(new_url, timeout=timeout)
response.raise_for_status()
content = response.text
return markdownify(content)
except (httpx.HTTPStatusError, httpx.RequestError) as e:
return f"Encountered an HTTP error: {str(e)}"