removing unlink code

2025-07-25 20:38:49 +03:00 · 2025-04-10 19:43:33 -07:00
parent 8f63ef2554
commit d99ff0f0dd
1 changed files with 113 additions and 84 deletions
--- a/src/arxiv_mcp_server/tools/download.py
+++ b/src/arxiv_mcp_server/tools/download.py
@@ -22,6 +22,7 @@ conversion_statuses: Dict[str, Any] = {}
@dataclass
 class ConversionStatus:
    """Track the status of a PDF to Markdown conversion."""
+
    paper_id: str
    status: str  # 'downloading', 'converting', 'success', 'error'
    started_at: datetime
@@ -37,16 +38,16 @@ download_tool = types.Tool(
        "properties": {
            "paper_id": {
                "type": "string",
-                "description": "The arXiv ID of the paper to download"
+                "description": "The arXiv ID of the paper to download",
            },
            "check_status": {
                "type": "boolean",
                "description": "If true, only check conversion status without downloading",
-                "default": False
-            }
+                "default": False,
+            },
        },
-        "required": ["paper_id"]
-    }
+        "required": ["paper_id"],
+    },
 )


@@ -63,7 +64,7 @@ def convert_pdf_to_markdown(paper_id: str, pdf_path: Path) -> None:
        logger.info(f"Starting conversion for {paper_id}")
        markdown = pymupdf4llm.to_markdown(pdf_path, show_progress=False)
        md_path = get_paper_path(paper_id, ".md")
-        
+
        with open(md_path, "w", encoding="utf-8") as f:
            f.write(markdown)

@@ -71,11 +72,10 @@ def convert_pdf_to_markdown(paper_id: str, pdf_path: Path) -> None:
        if status:
            status.status = "success"
            status.completed_at = datetime.now()
-            
+
        # Clean up PDF after successful conversion
-        pdf_path.unlink()
        logger.info(f"Conversion completed for {paper_id}")
-        
+
    except Exception as e:
        logger.error(f"Conversion failed for {paper_id}: {str(e)}")
        status = conversion_statuses.get(paper_id)
@@ -90,108 +90,137 @@ async def handle_download(arguments: Dict[str, Any]) -> List[types.TextContent]:
    try:
        paper_id = arguments["paper_id"]
        check_status = arguments.get("check_status", False)
-        
+
        # If only checking status
        if check_status:
            status = conversion_statuses.get(paper_id)
            if not status:
                if get_paper_path(paper_id, ".md").exists():
-                    return [types.TextContent(
+                    return [
+                        types.TextContent(
+                            type="text",
+                            text=json.dumps(
+                                {
+                                    "status": "success",
+                                    "message": "Paper is ready",
+                                    "resource_uri": f"file://{get_paper_path(paper_id, '.md')}",
+                                }
+                            ),
+                        )
+                    ]
+                return [
+                    types.TextContent(
                        type="text",
-                        text=json.dumps({
-                            "status": "success",
-                            "message": "Paper is ready",
-                            "resource_uri": f"file://{get_paper_path(paper_id, '.md')}"
-                        })
-                    )]
-                return [types.TextContent(
+                        text=json.dumps(
+                            {
+                                "status": "unknown",
+                                "message": "No download or conversion in progress",
+                            }
+                        ),
+                    )
+                ]
+
+            return [
+                types.TextContent(
                    type="text",
-                    text=json.dumps({
-                        "status": "unknown",
-                        "message": "No download or conversion in progress"
-                    })
-                )]
-            
-            return [types.TextContent(
-                type="text",
-                text=json.dumps({
-                    "status": status.status,
-                    "started_at": status.started_at.isoformat(),
-                    "completed_at": status.completed_at.isoformat() if status.completed_at else None,
-                    "error": status.error,
-                    "message": f"Paper conversion {status.status}"
-                })
-            )]
-        
+                    text=json.dumps(
+                        {
+                            "status": status.status,
+                            "started_at": status.started_at.isoformat(),
+                            "completed_at": (
+                                status.completed_at.isoformat()
+                                if status.completed_at
+                                else None
+                            ),
+                            "error": status.error,
+                            "message": f"Paper conversion {status.status}",
+                        }
+                    ),
+                )
+            ]
+
        # Check if paper is already converted
        if get_paper_path(paper_id, ".md").exists():
-            return [types.TextContent(
-                type="text",
-                text=json.dumps({
-                    "status": "success",
-                    "message": "Paper already available",
-                    "resource_uri": f"file://{get_paper_path(paper_id, '.md')}"
-                })
-            )]
-        
+            return [
+                types.TextContent(
+                    type="text",
+                    text=json.dumps(
+                        {
+                            "status": "success",
+                            "message": "Paper already available",
+                            "resource_uri": f"file://{get_paper_path(paper_id, '.md')}",
+                        }
+                    ),
+                )
+            ]
+
        # Check if already in progress
        if paper_id in conversion_statuses:
            status = conversion_statuses[paper_id]
-            return [types.TextContent(
-                type="text",
-                text=json.dumps({
-                    "status": status.status,
-                    "message": f"Paper conversion {status.status}",
-                    "started_at": status.started_at.isoformat()
-                })
-            )]
-        
+            return [
+                types.TextContent(
+                    type="text",
+                    text=json.dumps(
+                        {
+                            "status": status.status,
+                            "message": f"Paper conversion {status.status}",
+                            "started_at": status.started_at.isoformat(),
+                        }
+                    ),
+                )
+            ]
+
        # Start new download and conversion
        pdf_path = get_paper_path(paper_id, ".pdf")
        client = arxiv.Client()
-        
+
        # Initialize status
        conversion_statuses[paper_id] = ConversionStatus(
-            paper_id=paper_id,
-            status="downloading", 
-            started_at=datetime.now()
+            paper_id=paper_id, status="downloading", started_at=datetime.now()
        )
-        
+
        # Download PDF
        paper = next(client.results(arxiv.Search(id_list=[paper_id])))
        paper.download_pdf(dirpath=pdf_path.parent, filename=pdf_path.name)
-        
+
        # Update status and start conversion
        status = conversion_statuses[paper_id]
        status.status = "converting"
-        
+
        # Start conversion in thread
        asyncio.create_task(
            asyncio.to_thread(convert_pdf_to_markdown, paper_id, pdf_path)
        )
-        
-        return [types.TextContent(
-            type="text",
-            text=json.dumps({
-                "status": "converting",
-                "message": "Paper downloaded, conversion started",
-                "started_at": status.started_at.isoformat()
-            })
-        )]
-        
+
+        return [
+            types.TextContent(
+                type="text",
+                text=json.dumps(
+                    {
+                        "status": "converting",
+                        "message": "Paper downloaded, conversion started",
+                        "started_at": status.started_at.isoformat(),
+                    }
+                ),
+            )
+        ]
+
    except StopIteration:
-        return [types.TextContent(
-            type="text",
-            text=json.dumps({
-                "status": "error",
-                "message": f"Paper {paper_id} not found on arXiv"
-            })
-        )]
+        return [
+            types.TextContent(
+                type="text",
+                text=json.dumps(
+                    {
+                        "status": "error",
+                        "message": f"Paper {paper_id} not found on arXiv",
+                    }
+                ),
+            )
+        ]
    except Exception as e:
-        return [types.TextContent(
-            type="text",
-            text=json.dumps({
-                "status": "error",
-                "message": f"Error: {str(e)}"
-            })
-        )]
+        return [
+            types.TextContent(
+                type="text",
+                text=json.dumps({"status": "error", "message": f"Error: {str(e)}"}),
+            )
+        ]