mirror of
https://github.com/blazickjp/arxiv-mcp-server.git
synced 2025-07-25 20:38:49 +03:00
removing unlink code
This commit is contained in:
@@ -22,6 +22,7 @@ conversion_statuses: Dict[str, Any] = {}
|
||||
@dataclass
|
||||
class ConversionStatus:
|
||||
"""Track the status of a PDF to Markdown conversion."""
|
||||
|
||||
paper_id: str
|
||||
status: str # 'downloading', 'converting', 'success', 'error'
|
||||
started_at: datetime
|
||||
@@ -37,16 +38,16 @@ download_tool = types.Tool(
|
||||
"properties": {
|
||||
"paper_id": {
|
||||
"type": "string",
|
||||
"description": "The arXiv ID of the paper to download"
|
||||
"description": "The arXiv ID of the paper to download",
|
||||
},
|
||||
"check_status": {
|
||||
"type": "boolean",
|
||||
"description": "If true, only check conversion status without downloading",
|
||||
"default": False
|
||||
}
|
||||
"default": False,
|
||||
},
|
||||
},
|
||||
"required": ["paper_id"]
|
||||
}
|
||||
"required": ["paper_id"],
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@@ -63,7 +64,7 @@ def convert_pdf_to_markdown(paper_id: str, pdf_path: Path) -> None:
|
||||
logger.info(f"Starting conversion for {paper_id}")
|
||||
markdown = pymupdf4llm.to_markdown(pdf_path, show_progress=False)
|
||||
md_path = get_paper_path(paper_id, ".md")
|
||||
|
||||
|
||||
with open(md_path, "w", encoding="utf-8") as f:
|
||||
f.write(markdown)
|
||||
|
||||
@@ -71,11 +72,10 @@ def convert_pdf_to_markdown(paper_id: str, pdf_path: Path) -> None:
|
||||
if status:
|
||||
status.status = "success"
|
||||
status.completed_at = datetime.now()
|
||||
|
||||
|
||||
# Clean up PDF after successful conversion
|
||||
pdf_path.unlink()
|
||||
logger.info(f"Conversion completed for {paper_id}")
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Conversion failed for {paper_id}: {str(e)}")
|
||||
status = conversion_statuses.get(paper_id)
|
||||
@@ -90,108 +90,137 @@ async def handle_download(arguments: Dict[str, Any]) -> List[types.TextContent]:
|
||||
try:
|
||||
paper_id = arguments["paper_id"]
|
||||
check_status = arguments.get("check_status", False)
|
||||
|
||||
|
||||
# If only checking status
|
||||
if check_status:
|
||||
status = conversion_statuses.get(paper_id)
|
||||
if not status:
|
||||
if get_paper_path(paper_id, ".md").exists():
|
||||
return [types.TextContent(
|
||||
return [
|
||||
types.TextContent(
|
||||
type="text",
|
||||
text=json.dumps(
|
||||
{
|
||||
"status": "success",
|
||||
"message": "Paper is ready",
|
||||
"resource_uri": f"file://{get_paper_path(paper_id, '.md')}",
|
||||
}
|
||||
),
|
||||
)
|
||||
]
|
||||
return [
|
||||
types.TextContent(
|
||||
type="text",
|
||||
text=json.dumps({
|
||||
"status": "success",
|
||||
"message": "Paper is ready",
|
||||
"resource_uri": f"file://{get_paper_path(paper_id, '.md')}"
|
||||
})
|
||||
)]
|
||||
return [types.TextContent(
|
||||
text=json.dumps(
|
||||
{
|
||||
"status": "unknown",
|
||||
"message": "No download or conversion in progress",
|
||||
}
|
||||
),
|
||||
)
|
||||
]
|
||||
|
||||
return [
|
||||
types.TextContent(
|
||||
type="text",
|
||||
text=json.dumps({
|
||||
"status": "unknown",
|
||||
"message": "No download or conversion in progress"
|
||||
})
|
||||
)]
|
||||
|
||||
return [types.TextContent(
|
||||
type="text",
|
||||
text=json.dumps({
|
||||
"status": status.status,
|
||||
"started_at": status.started_at.isoformat(),
|
||||
"completed_at": status.completed_at.isoformat() if status.completed_at else None,
|
||||
"error": status.error,
|
||||
"message": f"Paper conversion {status.status}"
|
||||
})
|
||||
)]
|
||||
|
||||
text=json.dumps(
|
||||
{
|
||||
"status": status.status,
|
||||
"started_at": status.started_at.isoformat(),
|
||||
"completed_at": (
|
||||
status.completed_at.isoformat()
|
||||
if status.completed_at
|
||||
else None
|
||||
),
|
||||
"error": status.error,
|
||||
"message": f"Paper conversion {status.status}",
|
||||
}
|
||||
),
|
||||
)
|
||||
]
|
||||
|
||||
# Check if paper is already converted
|
||||
if get_paper_path(paper_id, ".md").exists():
|
||||
return [types.TextContent(
|
||||
type="text",
|
||||
text=json.dumps({
|
||||
"status": "success",
|
||||
"message": "Paper already available",
|
||||
"resource_uri": f"file://{get_paper_path(paper_id, '.md')}"
|
||||
})
|
||||
)]
|
||||
|
||||
return [
|
||||
types.TextContent(
|
||||
type="text",
|
||||
text=json.dumps(
|
||||
{
|
||||
"status": "success",
|
||||
"message": "Paper already available",
|
||||
"resource_uri": f"file://{get_paper_path(paper_id, '.md')}",
|
||||
}
|
||||
),
|
||||
)
|
||||
]
|
||||
|
||||
# Check if already in progress
|
||||
if paper_id in conversion_statuses:
|
||||
status = conversion_statuses[paper_id]
|
||||
return [types.TextContent(
|
||||
type="text",
|
||||
text=json.dumps({
|
||||
"status": status.status,
|
||||
"message": f"Paper conversion {status.status}",
|
||||
"started_at": status.started_at.isoformat()
|
||||
})
|
||||
)]
|
||||
|
||||
return [
|
||||
types.TextContent(
|
||||
type="text",
|
||||
text=json.dumps(
|
||||
{
|
||||
"status": status.status,
|
||||
"message": f"Paper conversion {status.status}",
|
||||
"started_at": status.started_at.isoformat(),
|
||||
}
|
||||
),
|
||||
)
|
||||
]
|
||||
|
||||
# Start new download and conversion
|
||||
pdf_path = get_paper_path(paper_id, ".pdf")
|
||||
client = arxiv.Client()
|
||||
|
||||
|
||||
# Initialize status
|
||||
conversion_statuses[paper_id] = ConversionStatus(
|
||||
paper_id=paper_id,
|
||||
status="downloading",
|
||||
started_at=datetime.now()
|
||||
paper_id=paper_id, status="downloading", started_at=datetime.now()
|
||||
)
|
||||
|
||||
|
||||
# Download PDF
|
||||
paper = next(client.results(arxiv.Search(id_list=[paper_id])))
|
||||
paper.download_pdf(dirpath=pdf_path.parent, filename=pdf_path.name)
|
||||
|
||||
|
||||
# Update status and start conversion
|
||||
status = conversion_statuses[paper_id]
|
||||
status.status = "converting"
|
||||
|
||||
|
||||
# Start conversion in thread
|
||||
asyncio.create_task(
|
||||
asyncio.to_thread(convert_pdf_to_markdown, paper_id, pdf_path)
|
||||
)
|
||||
|
||||
return [types.TextContent(
|
||||
type="text",
|
||||
text=json.dumps({
|
||||
"status": "converting",
|
||||
"message": "Paper downloaded, conversion started",
|
||||
"started_at": status.started_at.isoformat()
|
||||
})
|
||||
)]
|
||||
|
||||
|
||||
return [
|
||||
types.TextContent(
|
||||
type="text",
|
||||
text=json.dumps(
|
||||
{
|
||||
"status": "converting",
|
||||
"message": "Paper downloaded, conversion started",
|
||||
"started_at": status.started_at.isoformat(),
|
||||
}
|
||||
),
|
||||
)
|
||||
]
|
||||
|
||||
except StopIteration:
|
||||
return [types.TextContent(
|
||||
type="text",
|
||||
text=json.dumps({
|
||||
"status": "error",
|
||||
"message": f"Paper {paper_id} not found on arXiv"
|
||||
})
|
||||
)]
|
||||
return [
|
||||
types.TextContent(
|
||||
type="text",
|
||||
text=json.dumps(
|
||||
{
|
||||
"status": "error",
|
||||
"message": f"Paper {paper_id} not found on arXiv",
|
||||
}
|
||||
),
|
||||
)
|
||||
]
|
||||
except Exception as e:
|
||||
return [types.TextContent(
|
||||
type="text",
|
||||
text=json.dumps({
|
||||
"status": "error",
|
||||
"message": f"Error: {str(e)}"
|
||||
})
|
||||
)]
|
||||
return [
|
||||
types.TextContent(
|
||||
type="text",
|
||||
text=json.dumps({"status": "error", "message": f"Error: {str(e)}"}),
|
||||
)
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user