diff --git a/src/arxiv_mcp_server/tools/search.py b/src/arxiv_mcp_server/tools/search.py index 92dba24..31c6995 100644 --- a/src/arxiv_mcp_server/tools/search.py +++ b/src/arxiv_mcp_server/tools/search.py @@ -58,13 +58,34 @@ def _process_paper(paper: arxiv.Result) -> Dict[str, Any]: async def handle_search(arguments: Dict[str, Any]) -> List[types.TextContent]: - """Handle paper search requests.""" + """Handle paper search requests. + + Automatically adds field specifiers to plain queries for better relevance. + This fixes issue #33 where queries sorted by date returned irrelevant results. + """ try: client = arxiv.Client() max_results = min(int(arguments.get("max_results", 10)), settings.MAX_RESULTS) # Build search query with category filtering query = arguments["query"] + + # Add field specifier if not already present + # This ensures the query actually searches the content + if not any(field in query for field in ["all:", "ti:", "abs:", "au:", "cat:"]): + # Convert plain query to use all: field for better results + # Handle quoted phrases + if '"' in query: + # Keep quoted phrases intact + query = f"all:{query}" + else: + # For unquoted multi-word queries, use AND operator + terms = query.split() + if len(terms) > 1: + query = " AND ".join(f"all:{term}" for term in terms) + else: + query = f"all:{query}" + if categories := arguments.get("categories"): category_filter = " OR ".join(f"cat:{cat}" for cat in categories) query = f"({query}) AND ({category_filter})" diff --git a/tests/tools/test_search.py b/tests/tools/test_search.py index b3eea78..6f9b9b0 100644 --- a/tests/tools/test_search.py +++ b/tests/tools/test_search.py @@ -60,3 +60,24 @@ async def test_search_with_invalid_dates(mock_client): ) assert result[0].text.startswith("Error: Invalid date format") + + +@pytest.mark.asyncio +async def test_search_query_field_specifier_fix(mock_client): + """Test that plain queries get field specifiers for better relevance (issue #33).""" + with patch("arxiv.Client", return_value=mock_client): + with patch("arxiv.Search") as search_mock: + # Test multi-word query + await handle_search({"query": "quantum computing", "max_results": 1}) + search_mock.assert_called() + assert search_mock.call_args[1]["query"] == "all:quantum AND all:computing" + + # Test single word query + search_mock.reset_mock() + await handle_search({"query": "transformer", "max_results": 1}) + assert search_mock.call_args[1]["query"] == "all:transformer" + + # Test query with existing field specifier (should not be modified) + search_mock.reset_mock() + await handle_search({"query": "ti:neural networks", "max_results": 1}) + assert search_mock.call_args[1]["query"] == "ti:neural networks"