Merge pull request #34 from blazickjp/fix-search-relevance-issue-33

Fix search returning irrelevant results when sorted by date
This commit is contained in:
Joe Blazick
2025-06-05 18:42:37 -07:00
committed by GitHub
2 changed files with 43 additions and 1 deletions

View File

@@ -58,13 +58,34 @@ def _process_paper(paper: arxiv.Result) -> Dict[str, Any]:
async def handle_search(arguments: Dict[str, Any]) -> List[types.TextContent]:
"""Handle paper search requests."""
"""Handle paper search requests.
Automatically adds field specifiers to plain queries for better relevance.
This fixes issue #33 where queries sorted by date returned irrelevant results.
"""
try:
client = arxiv.Client()
max_results = min(int(arguments.get("max_results", 10)), settings.MAX_RESULTS)
# Build search query with category filtering
query = arguments["query"]
# Add field specifier if not already present
# This ensures the query actually searches the content
if not any(field in query for field in ["all:", "ti:", "abs:", "au:", "cat:"]):
# Convert plain query to use all: field for better results
# Handle quoted phrases
if '"' in query:
# Keep quoted phrases intact
query = f"all:{query}"
else:
# For unquoted multi-word queries, use AND operator
terms = query.split()
if len(terms) > 1:
query = " AND ".join(f"all:{term}" for term in terms)
else:
query = f"all:{query}"
if categories := arguments.get("categories"):
category_filter = " OR ".join(f"cat:{cat}" for cat in categories)
query = f"({query}) AND ({category_filter})"

View File

@@ -60,3 +60,24 @@ async def test_search_with_invalid_dates(mock_client):
)
assert result[0].text.startswith("Error: Invalid date format")
@pytest.mark.asyncio
async def test_search_query_field_specifier_fix(mock_client):
"""Test that plain queries get field specifiers for better relevance (issue #33)."""
with patch("arxiv.Client", return_value=mock_client):
with patch("arxiv.Search") as search_mock:
# Test multi-word query
await handle_search({"query": "quantum computing", "max_results": 1})
search_mock.assert_called()
assert search_mock.call_args[1]["query"] == "all:quantum AND all:computing"
# Test single word query
search_mock.reset_mock()
await handle_search({"query": "transformer", "max_results": 1})
assert search_mock.call_args[1]["query"] == "all:transformer"
# Test query with existing field specifier (should not be modified)
search_mock.reset_mock()
await handle_search({"query": "ti:neural networks", "max_results": 1})
assert search_mock.call_args[1]["query"] == "ti:neural networks"