[Refactor]: Change search method to BM25 & Dense vector Hybrid search (#119)

* [Refactor]: Change search method to BM25 & Dense vector Hybrid search * [Restructure] 1.Refactor codebase to use Context class 2.Add hybrid mode environment variable Signed-off-by: ShawnZheng <shawn.zheng@zilliz.com> --------- Signed-off-by: ShawnZheng <shawn.zheng@zilliz.com>
2025-10-06 01:10:02 +03:00 · 2025-08-05 16:58:20 +08:00
parent 51822f5470
commit 419d40e3aa
11 changed files with 845 additions and 134 deletions
--- a/packages/core/src/context.ts
+++ b/packages/core/src/context.ts
@@ -11,7 +11,10 @@ import {
 import {
    VectorDatabase,
    VectorDocument,
-    VectorSearchResult
+    VectorSearchResult,
+    HybridSearchRequest,
+    HybridSearchOptions,
+    HybridSearchResult
 } from './vectordb';
 import { SemanticSearchResult } from './types';
 import { envManager } from './utils/env-manager';
@@ -152,17 +155,30 @@ export class Context {
    }

    /**
-     * Generate collection name based on codebase path
+     * Get isHybrid setting from environment variable with default true
     */
-    private getCollectionName(codebasePath: string): string {
-        const normalizedPath = path.resolve(codebasePath);
-        const hash = crypto.createHash('md5').update(normalizedPath).digest('hex');
-        return `code_chunks_${hash.substring(0, 8)}`;
+    private getIsHybrid(): boolean {
+        const isHybridEnv = envManager.get('HYBRID_MODE');
+        if (isHybridEnv === undefined || isHybridEnv === null) {
+            return true; // Default to true
+        }
+        return isHybridEnv.toLowerCase() === 'true';
    }

    /**
-     * Index entire codebase
-     * @param codebasePath Codebase path
+     * Generate collection name based on codebase path and hybrid mode
+     */
+    private getCollectionName(codebasePath: string): string {
+        const isHybrid = this.getIsHybrid();
+        const normalizedPath = path.resolve(codebasePath);
+        const hash = crypto.createHash('md5').update(normalizedPath).digest('hex');
+        const prefix = isHybrid === true ? 'hybrid_code_chunks' : 'code_chunks';
+        return `${prefix}_${hash.substring(0, 8)}`;
+    }
+
+    /**
+     * Index a codebase for semantic search
+     * @param codebasePath Codebase root path
     * @param progressCallback Optional progress callback function
     * @returns Indexing statistics
     */
@@ -170,7 +186,9 @@ export class Context {
        codebasePath: string,
        progressCallback?: (progress: { phase: string; current: number; total: number; percentage: number }) => void
    ): Promise<{ indexedFiles: number; totalChunks: number; status: 'completed' | 'limit_reached' }> {
-        console.log(`🚀 Starting to index codebase: ${codebasePath}`);
+        const isHybrid = this.getIsHybrid();
+        const searchType = isHybrid === true ? 'hybrid search' : 'semantic search';
+        console.log(`🚀 Starting to index codebase with ${searchType}: ${codebasePath}`);

        // 1. Load ignore patterns from various ignore files
        await this.loadGitignorePatterns(codebasePath);
@@ -239,7 +257,7 @@ export class Context {
        if (!synchronizer) {
            // Load project-specific ignore patterns before creating FileSynchronizer
            await this.loadGitignorePatterns(codebasePath);
-            
+
            // To be safe, let's initialize if it's not there.
            const newSynchronizer = new FileSynchronizer(codebasePath, this.ignorePatterns);
            await newSynchronizer.initialize();
@@ -317,37 +335,118 @@ export class Context {
    }

    /**
-     * Semantic search
+     * Semantic search with unified implementation
     * @param codebasePath Codebase path to search in
     * @param query Search query
     * @param topK Number of results to return
     * @param threshold Similarity threshold
     */
    async semanticSearch(codebasePath: string, query: string, topK: number = 5, threshold: number = 0.5): Promise<SemanticSearchResult[]> {
-        console.log(`🔍 Executing semantic search: "${query}" in ${codebasePath}`);
+        const isHybrid = this.getIsHybrid();
+        const searchType = isHybrid === true ? 'hybrid search' : 'semantic search';
+        console.log(`🔍 Executing ${searchType}: "${query}" in ${codebasePath}`);

-        // 1. Generate query vector
-        const queryEmbedding: EmbeddingVector = await this.embedding.embed(query);
+        const collectionName = this.getCollectionName(codebasePath);
+        console.log(`🔍 Using collection: ${collectionName}`);

-        // 2. Search in vector database
-        const searchResults: VectorSearchResult[] = await this.vectorDatabase.search(
-            this.getCollectionName(codebasePath),
-            queryEmbedding.vector,
-            { topK, threshold }
-        );
+        // Check if collection exists and has data
+        const hasCollection = await this.vectorDatabase.hasCollection(collectionName);
+        if (!hasCollection) {
+            console.log(`⚠️  Collection '${collectionName}' does not exist. Please index the codebase first.`);
+            return [];
+        }

-        // 3. Convert to semantic search result format
-        const results: SemanticSearchResult[] = searchResults.map(result => ({
-            content: result.document.content,
-            relativePath: result.document.relativePath,
-            startLine: result.document.startLine,
-            endLine: result.document.endLine,
-            language: result.document.metadata.language || 'unknown',
-            score: result.score
-        }));
+        if (isHybrid === true) {
+            try {
+                // Check collection stats to see if it has data
+                const stats = await this.vectorDatabase.query(collectionName, '', ['id'], 1);
+                console.log(`🔍 Collection '${collectionName}' exists and appears to have data`);
+            } catch (error) {
+                console.log(`⚠️  Collection '${collectionName}' exists but may be empty or not properly indexed:`, error);
+            }

-        console.log(`✅ Found ${results.length} relevant results`);
-        return results;
+            // 1. Generate query vector
+            console.log(`🔍 Generating embeddings for query: "${query}"`);
+            const queryEmbedding: EmbeddingVector = await this.embedding.embed(query);
+            console.log(`✅ Generated embedding vector with dimension: ${queryEmbedding.vector.length}`);
+            console.log(`🔍 First 5 embedding values: [${queryEmbedding.vector.slice(0, 5).join(', ')}]`);
+
+            // 2. Prepare hybrid search requests
+            const searchRequests: HybridSearchRequest[] = [
+                {
+                    data: queryEmbedding.vector,
+                    anns_field: "vector",
+                    param: { "nprobe": 10 },
+                    limit: topK
+                },
+                {
+                    data: query,
+                    anns_field: "sparse_vector",
+                    param: { "drop_ratio_search": 0.2 },
+                    limit: topK
+                }
+            ];
+
+            console.log(`🔍 Search request 1 (dense): anns_field="${searchRequests[0].anns_field}", vector_dim=${queryEmbedding.vector.length}, limit=${searchRequests[0].limit}`);
+            console.log(`🔍 Search request 2 (sparse): anns_field="${searchRequests[1].anns_field}", query_text="${query}", limit=${searchRequests[1].limit}`);
+
+            // 3. Execute hybrid search
+            console.log(`🔍 Executing hybrid search with RRF reranking...`);
+            const searchResults: HybridSearchResult[] = await this.vectorDatabase.hybridSearch(
+                collectionName,
+                searchRequests,
+                {
+                    rerank: {
+                        strategy: 'rrf',
+                        params: { k: 100 }
+                    },
+                    limit: topK
+                }
+            );
+
+            console.log(`🔍 Raw search results count: ${searchResults.length}`);
+
+            // 4. Convert to semantic search result format
+            const results: SemanticSearchResult[] = searchResults.map(result => ({
+                content: result.document.content,
+                relativePath: result.document.relativePath,
+                startLine: result.document.startLine,
+                endLine: result.document.endLine,
+                language: result.document.metadata.language || 'unknown',
+                score: result.score
+            }));
+
+            console.log(`✅ Found ${results.length} relevant hybrid results`);
+            if (results.length > 0) {
+                console.log(`🔍 Top result score: ${results[0].score}, path: ${results[0].relativePath}`);
+            }
+
+            return results;
+        } else {
+            // Regular semantic search
+            // 1. Generate query vector
+            const queryEmbedding: EmbeddingVector = await this.embedding.embed(query);
+
+            // 2. Search in vector database
+            const searchResults: VectorSearchResult[] = await this.vectorDatabase.search(
+                collectionName,
+                queryEmbedding.vector,
+                { topK, threshold }
+            );
+
+            // 3. Convert to semantic search result format
+            const results: SemanticSearchResult[] = searchResults.map(result => ({
+                content: result.document.content,
+                relativePath: result.document.relativePath,
+                startLine: result.document.startLine,
+                endLine: result.document.endLine,
+                language: result.document.metadata.language || 'unknown',
+                score: result.score
+            }));
+
+            console.log(`✅ Found ${results.length} relevant results`);
+            return results;
+        }
    }

    /**
@@ -458,10 +557,18 @@ export class Context {
     * Prepare vector collection
     */
    private async prepareCollection(codebasePath: string): Promise<void> {
-        // Create new collection
-        console.log(`🔧 Preparing vector collection for codebase: ${codebasePath}`);
+        const isHybrid = this.getIsHybrid();
+        const collectionType = isHybrid === true ? 'hybrid vector' : 'vector';
+        console.log(`🔧 Preparing ${collectionType} collection for codebase: ${codebasePath}`);
        const collectionName = this.getCollectionName(codebasePath);

+        // Check if collection already exists
+        const collectionExists = await this.vectorDatabase.hasCollection(collectionName);
+        if (collectionExists) {
+            console.log(`📋 Collection ${collectionName} already exists, skipping creation`);
+            return;
+        }
+
        // For Ollama embeddings, ensure dimension is detected before creating collection
        if (this.embedding.getProvider() === 'Ollama' && typeof (this.embedding as any).initializeDimension === 'function') {
            await (this.embedding as any).initializeDimension();
@@ -469,7 +576,13 @@ export class Context {

        const dimension = this.embedding.getDimension();
        const dirName = path.basename(codebasePath);
-        await this.vectorDatabase.createCollection(collectionName, dimension, `Index for ${dirName}`);
+
+        if (isHybrid === true) {
+            await this.vectorDatabase.createHybridCollection(collectionName, dimension, `Hybrid Index for ${dirName}`);
+        } else {
+            await this.vectorDatabase.createCollection(collectionName, dimension, `Index for ${dirName}`);
+        }
+
        console.log(`✅ Collection ${collectionName} created successfully (dimension: ${dimension})`);
    }

@@ -517,6 +630,7 @@ export class Context {
        codebasePath: string,
        onFileProcessed?: (filePath: string, fileIndex: number, totalFiles: number) => void
    ): Promise<{ processedFiles: number; totalChunks: number; status: 'completed' | 'limit_reached' }> {
+        const isHybrid = this.getIsHybrid();
        const EMBEDDING_BATCH_SIZE = Math.max(1, parseInt(envManager.get('EMBEDDING_BATCH_SIZE') || '100', 10));
        const CHUNK_LIMIT = 450000;
        console.log(`🔧 Using EMBEDDING_BATCH_SIZE: ${EMBEDDING_BATCH_SIZE}`);
@@ -551,8 +665,8 @@ export class Context {
                        try {
                            await this.processChunkBuffer(chunkBuffer);
                        } catch (error) {
-                            // TODO: 
-                            console.error(`❌ Failed to process chunk batch: ${error}`);
+                            const searchType = isHybrid === true ? 'hybrid' : 'regular';
+                            console.error(`❌ Failed to process chunk batch for ${searchType}: ${error}`);
                        } finally {
                            chunkBuffer = []; // Always clear buffer, even on failure
                        }
@@ -580,11 +694,12 @@ export class Context {

        // Process any remaining chunks in the buffer
        if (chunkBuffer.length > 0) {
-            console.log(`📝 Processing final batch of ${chunkBuffer.length} chunks`);
+            const searchType = isHybrid === true ? 'hybrid' : 'regular';
+            console.log(`📝 Processing final batch of ${chunkBuffer.length} chunks for ${searchType}`);
            try {
                await this.processChunkBuffer(chunkBuffer);
            } catch (error) {
-                console.error(`❌ Failed to process final chunk batch: ${error}`);
+                console.error(`❌ Failed to process final chunk batch for ${searchType}: ${error}`);
            }
        }

@@ -608,7 +723,9 @@ export class Context {
        // Estimate tokens (rough estimation: 1 token ≈ 4 characters)
        const estimatedTokens = chunks.reduce((sum, chunk) => sum + Math.ceil(chunk.content.length / 4), 0);

-        console.log(`🔄 Processing batch of ${chunks.length} chunks (~${estimatedTokens} tokens)`);
+        const isHybrid = this.getIsHybrid();
+        const searchType = isHybrid === true ? 'hybrid' : 'regular';
+        console.log(`🔄 Processing batch of ${chunks.length} chunks (~${estimatedTokens} tokens) for ${searchType}`);
        await this.processChunkBatch(chunks, codebasePath);
    }

@@ -616,45 +733,75 @@ export class Context {
     * Process a batch of chunks
     */
    private async processChunkBatch(chunks: CodeChunk[], codebasePath: string): Promise<void> {
+        const isHybrid = this.getIsHybrid();
+
        // Generate embedding vectors
        const chunkContents = chunks.map(chunk => chunk.content);
-        const embeddings: EmbeddingVector[] = await this.embedding.embedBatch(chunkContents);
+        const embeddings = await this.embedding.embedBatch(chunkContents);

-        // Prepare vector documents
-        const documents: VectorDocument[] = chunks.map((chunk, index) => {
-            if (!chunk.metadata.filePath) {
-                throw new Error(`Missing filePath in chunk metadata at index ${index}`);
-            }
-
-            const relativePath = path.relative(codebasePath, chunk.metadata.filePath);
-            const fileExtension = path.extname(chunk.metadata.filePath);
-
-            // Extract metadata that should be stored separately
-            const { filePath, startLine, endLine, ...restMetadata } = chunk.metadata;
-
-            return {
-                id: this.generateId(relativePath, chunk.metadata.startLine || 0, chunk.metadata.endLine || 0, chunk.content),
-                vector: embeddings[index].vector,
-                content: chunk.content,
-                relativePath,
-                startLine: chunk.metadata.startLine || 0,
-                endLine: chunk.metadata.endLine || 0,
-                fileExtension,
-                metadata: {
-                    ...restMetadata,
-                    codebasePath,
-                    language: chunk.metadata.language || 'unknown',
-                    chunkIndex: index
+        if (isHybrid === true) {
+            // Create hybrid vector documents
+            const documents: VectorDocument[] = chunks.map((chunk, index) => {
+                if (!chunk.metadata.filePath) {
+                    throw new Error(`Missing filePath in chunk metadata at index ${index}`);
                }
-            };
-        });

-        // Store to vector database
-        await this.vectorDatabase.insert(this.getCollectionName(codebasePath), documents);
+                const relativePath = path.relative(codebasePath, chunk.metadata.filePath);
+                const fileExtension = path.extname(chunk.metadata.filePath);
+                const { filePath, startLine, endLine, ...restMetadata } = chunk.metadata;
+
+                return {
+                    id: this.generateId(relativePath, chunk.metadata.startLine || 0, chunk.metadata.endLine || 0, chunk.content),
+                    content: chunk.content, // Full text content for BM25 and storage
+                    vector: embeddings[index].vector, // Dense vector
+                    relativePath,
+                    startLine: chunk.metadata.startLine || 0,
+                    endLine: chunk.metadata.endLine || 0,
+                    fileExtension,
+                    metadata: {
+                        ...restMetadata,
+                        codebasePath,
+                        language: chunk.metadata.language || 'unknown',
+                        chunkIndex: index
+                    }
+                };
+            });
+
+            // Store to vector database
+            await this.vectorDatabase.insertHybrid(this.getCollectionName(codebasePath), documents);
+        } else {
+            // Create regular vector documents
+            const documents: VectorDocument[] = chunks.map((chunk, index) => {
+                if (!chunk.metadata.filePath) {
+                    throw new Error(`Missing filePath in chunk metadata at index ${index}`);
+                }
+
+                const relativePath = path.relative(codebasePath, chunk.metadata.filePath);
+                const fileExtension = path.extname(chunk.metadata.filePath);
+                const { filePath, startLine, endLine, ...restMetadata } = chunk.metadata;
+
+                return {
+                    id: this.generateId(relativePath, chunk.metadata.startLine || 0, chunk.metadata.endLine || 0, chunk.content),
+                    vector: embeddings[index].vector,
+                    content: chunk.content,
+                    relativePath,
+                    startLine: chunk.metadata.startLine || 0,
+                    endLine: chunk.metadata.endLine || 0,
+                    fileExtension,
+                    metadata: {
+                        ...restMetadata,
+                        codebasePath,
+                        language: chunk.metadata.language || 'unknown',
+                        chunkIndex: index
+                    }
+                };
+            });
+
+            // Store to vector database
+            await this.vectorDatabase.insert(this.getCollectionName(codebasePath), documents);
+        }
    }

-
-
    /**
     * Get programming language based on file extension
     */
--- a/packages/core/src/vectordb/index.ts
+++ b/packages/core/src/vectordb/index.ts
@@ -4,6 +4,10 @@ export {
    SearchOptions,
    VectorSearchResult,
    VectorDatabase,
+    HybridSearchRequest,
+    HybridSearchOptions,
+    HybridSearchResult,
+    RerankStrategy,
    COLLECTION_LIMIT_MESSAGE
 } from './types';

--- a/packages/core/src/vectordb/milvus-restful-vectordb.ts
+++ b/packages/core/src/vectordb/milvus-restful-vectordb.ts
@@ -3,6 +3,9 @@ import {
    SearchOptions,
    VectorSearchResult,
    VectorDatabase,
+    HybridSearchRequest,
+    HybridSearchOptions,
+    HybridSearchResult,
    COLLECTION_LIMIT_MESSAGE
 } from './types';
 import { ClusterManager } from './zilliz-utils';
@@ -467,4 +470,277 @@ export class MilvusRestfulVectorDatabase implements VectorDatabase {
            throw error;
        }
    }
+
+    async createHybridCollection(collectionName: string, dimension: number, description?: string): Promise<void> {
+        try {
+            const restfulConfig = this.config as MilvusRestfulConfig;
+
+            const collectionSchema = {
+                collectionName,
+                dbName: restfulConfig.database,
+                schema: {
+                    enableDynamicField: false,
+                    functions: [
+                        {
+                            name: "content_bm25_emb",
+                            description: "content bm25 function",
+                            type: "BM25",
+                            inputFieldNames: ["content"],
+                            outputFieldNames: ["sparse_vector"],
+                            params: {},
+                        },
+                    ],
+                    fields: [
+                        {
+                            fieldName: "id",
+                            dataType: "VarChar",
+                            isPrimary: true,
+                            elementTypeParams: {
+                                max_length: 512
+                            }
+                        },
+                        {
+                            fieldName: "content",
+                            dataType: "VarChar",
+                            elementTypeParams: {
+                                max_length: 65535,
+                                enable_analyzer: true
+                            }
+                        },
+                        {
+                            fieldName: "vector",
+                            dataType: "FloatVector",
+                            elementTypeParams: {
+                                dim: dimension
+                            }
+                        },
+                        {
+                            fieldName: "sparse_vector",
+                            dataType: "SparseFloatVector"
+                        },
+                        {
+                            fieldName: "relativePath",
+                            dataType: "VarChar",
+                            elementTypeParams: {
+                                max_length: 1024
+                            }
+                        },
+                        {
+                            fieldName: "startLine",
+                            dataType: "Int64"
+                        },
+                        {
+                            fieldName: "endLine",
+                            dataType: "Int64"
+                        },
+                        {
+                            fieldName: "fileExtension",
+                            dataType: "VarChar",
+                            elementTypeParams: {
+                                max_length: 32
+                            }
+                        },
+                        {
+                            fieldName: "metadata",
+                            dataType: "VarChar",
+                            elementTypeParams: {
+                                max_length: 65535
+                            }
+                        }
+                    ]
+                }
+            };
+
+            // Step 1: Create collection with schema and functions
+            await createCollectionWithLimitCheck(this.makeRequest.bind(this), collectionSchema);
+
+            // Step 2: Create indexes for both vector fields
+            await this.createHybridIndexes(collectionName);
+
+            // Step 3: Load collection to memory for searching
+            await this.loadCollection(collectionName);
+
+        } catch (error) {
+            console.error(`❌ Failed to create hybrid collection '${collectionName}':`, error);
+            throw error;
+        }
+    }
+
+    private async createHybridIndexes(collectionName: string): Promise<void> {
+        try {
+            const restfulConfig = this.config as MilvusRestfulConfig;
+
+            // Create index for dense vector
+            const denseIndexParams = {
+                collectionName,
+                dbName: restfulConfig.database,
+                indexParams: [
+                    {
+                        fieldName: "vector",
+                        indexName: "vector_index",
+                        metricType: "COSINE",
+                        index_type: "AUTOINDEX"
+                    }
+                ]
+            };
+            await this.makeRequest('/indexes/create', 'POST', denseIndexParams);
+
+            // Create index for sparse vector
+            const sparseIndexParams = {
+                collectionName,
+                dbName: restfulConfig.database,
+                indexParams: [
+                    {
+                        fieldName: "sparse_vector",
+                        indexName: "sparse_vector_index",
+                        metricType: "BM25",
+                        index_type: "SPARSE_INVERTED_INDEX"
+                    }
+                ]
+            };
+            await this.makeRequest('/indexes/create', 'POST', sparseIndexParams);
+
+        } catch (error) {
+            console.error(`❌ Failed to create hybrid indexes for collection '${collectionName}':`, error);
+            throw error;
+        }
+    }
+
+    async insertHybrid(collectionName: string, documents: VectorDocument[]): Promise<void> {
+        await this.ensureInitialized();
+
+        try {
+            const restfulConfig = this.config as MilvusRestfulConfig;
+
+            const data = documents.map(doc => ({
+                id: doc.id,
+                content: doc.content,
+                vector: doc.vector,
+                relativePath: doc.relativePath,
+                startLine: doc.startLine,
+                endLine: doc.endLine,
+                fileExtension: doc.fileExtension,
+                metadata: JSON.stringify(doc.metadata),
+            }));
+
+            const insertRequest = {
+                collectionName,
+                dbName: restfulConfig.database,
+                data: data
+            };
+
+            const response = await this.makeRequest('/entities/insert', 'POST', insertRequest);
+
+            if (response.code !== 0) {
+                throw new Error(`Insert failed: ${response.message || 'Unknown error'}`);
+            }
+
+        } catch (error) {
+            console.error(`❌ Failed to insert hybrid documents to collection '${collectionName}':`, error);
+            throw error;
+        }
+    }
+
+    async hybridSearch(collectionName: string, searchRequests: HybridSearchRequest[], options?: HybridSearchOptions): Promise<HybridSearchResult[]> {
+        await this.ensureInitialized();
+
+        try {
+            const restfulConfig = this.config as MilvusRestfulConfig;
+
+            console.log(`🔍 Preparing hybrid search for collection: ${collectionName}`);
+
+            // Prepare search requests according to Milvus REST API hybrid search specification
+            // For dense vector search - data must be array of vectors: [[0.1, 0.2, 0.3, ...]]
+            const search_param_1 = {
+                data: Array.isArray(searchRequests[0].data) ? [searchRequests[0].data] : [[searchRequests[0].data]],
+                annsField: searchRequests[0].anns_field, // "vector"
+                limit: searchRequests[0].limit,
+                outputFields: ["*"],
+                searchParams: {
+                    metricType: "COSINE",
+                    params: searchRequests[0].param || { "nprobe": 10 }
+                }
+            };
+
+            // For sparse vector search - data must be array of queries: ["query text"]
+            const search_param_2 = {
+                data: Array.isArray(searchRequests[1].data) ? searchRequests[1].data : [searchRequests[1].data],
+                annsField: searchRequests[1].anns_field, // "sparse_vector"
+                limit: searchRequests[1].limit,
+                outputFields: ["*"],
+                searchParams: {
+                    metricType: "BM25",
+                    params: searchRequests[1].param || { "drop_ratio_search": 0.2 }
+                }
+            };
+
+            const rerank_strategy = {
+                strategy: "rrf",
+                params: {
+                    k: 100
+                }
+            };
+
+            console.log(`🔍 Dense search params:`, JSON.stringify({
+                annsField: search_param_1.annsField,
+                limit: search_param_1.limit,
+                data_length: Array.isArray(search_param_1.data[0]) ? search_param_1.data[0].length : 'N/A',
+                searchParams: search_param_1.searchParams
+            }, null, 2));
+            console.log(`🔍 Sparse search params:`, JSON.stringify({
+                annsField: search_param_2.annsField,
+                limit: search_param_2.limit,
+                query_text: typeof search_param_2.data[0] === 'string' ? search_param_2.data[0].substring(0, 50) + '...' : 'N/A',
+                searchParams: search_param_2.searchParams
+            }, null, 2));
+
+            const hybridSearchRequest = {
+                collectionName,
+                dbName: restfulConfig.database,
+                search: [search_param_1, search_param_2],
+                rerank: rerank_strategy,
+                limit: options?.limit || searchRequests[0]?.limit || 10,
+                outputFields: ['id', 'content', 'relativePath', 'startLine', 'endLine', 'fileExtension', 'metadata'],
+            };
+
+            console.log(`🔍 Complete REST API request:`, JSON.stringify({
+                collectionName: hybridSearchRequest.collectionName,
+                dbName: hybridSearchRequest.dbName,
+                search_count: hybridSearchRequest.search.length,
+                rerank: hybridSearchRequest.rerank,
+                limit: hybridSearchRequest.limit,
+                outputFields: hybridSearchRequest.outputFields
+            }, null, 2));
+
+            console.log(`🔍 Executing REST API hybrid search...`);
+            const response = await this.makeRequest('/entities/hybrid_search', 'POST', hybridSearchRequest);
+
+            if (response.code !== 0) {
+                throw new Error(`Hybrid search failed: ${response.message || 'Unknown error'}`);
+            }
+
+            const results = response.data || [];
+            console.log(`✅ Found ${results.length} results from hybrid search`);
+
+            // Transform response to HybridSearchResult format
+            return results.map((result: any) => ({
+                document: {
+                    id: result.id,
+                    content: result.content,
+                    vector: [], // Vector not returned in search results
+                    sparse_vector: [], // Vector not returned in search results
+                    relativePath: result.relativePath,
+                    startLine: result.startLine,
+                    endLine: result.endLine,
+                    fileExtension: result.fileExtension,
+                    metadata: JSON.parse(result.metadata || '{}'),
+                },
+                score: result.score || result.distance || 0,
+            }));
+
+        } catch (error) {
+            console.error(`❌ Failed to perform hybrid search on collection '${collectionName}':`, error);
+            throw error;
+        }
+    }
 }
--- a/packages/core/src/vectordb/milvus-vectordb.ts
+++ b/packages/core/src/vectordb/milvus-vectordb.ts
@@ -1,9 +1,12 @@
-import { MilvusClient, DataType, MetricType } from '@zilliz/milvus2-sdk-node';
+import { MilvusClient, DataType, MetricType, FunctionType } from '@zilliz/milvus2-sdk-node';
 import {
    VectorDocument,
    SearchOptions,
    VectorSearchResult,
    VectorDatabase,
+    HybridSearchRequest,
+    HybridSearchOptions,
+    HybridSearchResult,
    COLLECTION_LIMIT_MESSAGE
 } from './types';
 import { ClusterManager } from './zilliz-utils';
@@ -298,4 +301,231 @@ export class MilvusVectorDatabase implements VectorDatabase {
            throw error;
        }
    }
+
+    async createHybridCollection(collectionName: string, dimension: number, description?: string): Promise<void> {
+        await this.ensureInitialized();
+
+        console.log('Beginning hybrid collection creation:', collectionName);
+        console.log('Collection dimension:', dimension);
+
+        const schema = [
+            {
+                name: 'id',
+                description: 'Document ID',
+                data_type: DataType.VarChar,
+                max_length: 512,
+                is_primary_key: true,
+            },
+            {
+                name: 'content',
+                description: 'Full text content for BM25 and storage',
+                data_type: DataType.VarChar,
+                max_length: 65535,
+                enable_analyzer: true,
+            },
+            {
+                name: 'vector',
+                description: 'Dense vector embedding',
+                data_type: DataType.FloatVector,
+                dim: dimension,
+            },
+            {
+                name: 'sparse_vector',
+                description: 'Sparse vector embedding from BM25',
+                data_type: DataType.SparseFloatVector,
+            },
+            {
+                name: 'relativePath',
+                description: 'Relative path to the codebase',
+                data_type: DataType.VarChar,
+                max_length: 1024,
+            },
+            {
+                name: 'startLine',
+                description: 'Start line number of the chunk',
+                data_type: DataType.Int64,
+            },
+            {
+                name: 'endLine',
+                description: 'End line number of the chunk',
+                data_type: DataType.Int64,
+            },
+            {
+                name: 'fileExtension',
+                description: 'File extension',
+                data_type: DataType.VarChar,
+                max_length: 32,
+            },
+            {
+                name: 'metadata',
+                description: 'Additional document metadata as JSON string',
+                data_type: DataType.VarChar,
+                max_length: 65535,
+            },
+        ];
+
+        // Add BM25 function
+        const functions = [
+            {
+                name: "content_bm25_emb",
+                description: "content bm25 function",
+                type: FunctionType.BM25,
+                input_field_names: ["content"],
+                output_field_names: ["sparse_vector"],
+                params: {},
+            },
+        ];
+
+        const createCollectionParams = {
+            collection_name: collectionName,
+            description: description || `Hybrid code context collection: ${collectionName}`,
+            fields: schema,
+            functions: functions,
+        };
+
+        await createCollectionWithLimitCheck(this.client!, createCollectionParams);
+
+        // Create indexes for both vector fields
+        // Index for dense vector
+        const denseIndexParams = {
+            collection_name: collectionName,
+            field_name: 'vector',
+            index_type: 'AUTOINDEX',
+            metric_type: MetricType.COSINE,
+        };
+        await this.client!.createIndex(denseIndexParams);
+
+        // Index for sparse vector
+        const sparseIndexParams = {
+            collection_name: collectionName,
+            field_name: 'sparse_vector',
+            index_type: 'SPARSE_INVERTED_INDEX',
+            metric_type: MetricType.BM25,
+        };
+        await this.client!.createIndex(sparseIndexParams);
+
+        // Load collection to memory
+        await this.client!.loadCollection({
+            collection_name: collectionName,
+        });
+
+        // Verify collection is created correctly
+        await this.client!.describeCollection({
+            collection_name: collectionName,
+        });
+    }
+
+    async insertHybrid(collectionName: string, documents: VectorDocument[]): Promise<void> {
+        await this.ensureInitialized();
+
+        const data = documents.map(doc => ({
+            id: doc.id,
+            content: doc.content,
+            vector: doc.vector,
+            relativePath: doc.relativePath,
+            startLine: doc.startLine,
+            endLine: doc.endLine,
+            fileExtension: doc.fileExtension,
+            metadata: JSON.stringify(doc.metadata),
+        }));
+
+        await this.client!.insert({
+            collection_name: collectionName,
+            data: data,
+        });
+    }
+
+    async hybridSearch(collectionName: string, searchRequests: HybridSearchRequest[], options?: HybridSearchOptions): Promise<HybridSearchResult[]> {
+        await this.ensureInitialized();
+
+        try {
+            // Generate OpenAI embedding for the first search request (dense)
+            console.log(`🔍 Preparing hybrid search for collection: ${collectionName}`);
+
+            // Prepare search requests in the correct Milvus format
+            const search_param_1 = {
+                data: Array.isArray(searchRequests[0].data) ? searchRequests[0].data : [searchRequests[0].data],
+                anns_field: searchRequests[0].anns_field, // "vector"
+                param: searchRequests[0].param, // {"nprobe": 10}
+                limit: searchRequests[0].limit
+            };
+
+            const search_param_2 = {
+                data: searchRequests[1].data, // query text for sparse search
+                anns_field: searchRequests[1].anns_field, // "sparse_vector"
+                param: searchRequests[1].param, // {"drop_ratio_search": 0.2}
+                limit: searchRequests[1].limit
+            };
+
+            // Set rerank strategy to RRF (100) by default
+            const rerank_strategy = {
+                strategy: "rrf",
+                params: {
+                    k: 100
+                }
+            };
+
+            console.log(`🔍 Dense search params:`, JSON.stringify({
+                anns_field: search_param_1.anns_field,
+                param: search_param_1.param,
+                limit: search_param_1.limit,
+                data_length: Array.isArray(search_param_1.data[0]) ? search_param_1.data[0].length : 'N/A'
+            }, null, 2));
+            console.log(`🔍 Sparse search params:`, JSON.stringify({
+                anns_field: search_param_2.anns_field,
+                param: search_param_2.param,
+                limit: search_param_2.limit,
+                query_text: typeof search_param_2.data === 'string' ? search_param_2.data.substring(0, 50) + '...' : 'N/A'
+            }, null, 2));
+            console.log(`🔍 Rerank strategy:`, JSON.stringify(rerank_strategy, null, 2));
+
+            // Execute hybrid search using the correct client.search format
+            const searchParams = {
+                collection_name: collectionName,
+                data: [search_param_1, search_param_2],
+                limit: options?.limit || searchRequests[0]?.limit || 10,
+                rerank: rerank_strategy,
+                output_fields: ['id', 'content', 'relativePath', 'startLine', 'endLine', 'fileExtension', 'metadata'],
+            };
+
+            console.log(`🔍 Complete search request:`, JSON.stringify({
+                collection_name: searchParams.collection_name,
+                data_count: searchParams.data.length,
+                limit: searchParams.limit,
+                rerank: searchParams.rerank,
+                output_fields: searchParams.output_fields
+            }, null, 2));
+
+            const searchResult = await this.client!.search(searchParams);
+
+            console.log(`🔍 Search executed, processing results...`);
+
+            if (!searchResult.results || searchResult.results.length === 0) {
+                console.log(`⚠️  No results returned from Milvus search`);
+                return [];
+            }
+
+            console.log(`✅ Found ${searchResult.results.length} results from hybrid search`);
+
+            // Transform results to HybridSearchResult format
+            return searchResult.results.map((result: any) => ({
+                document: {
+                    id: result.id,
+                    content: result.content,
+                    vector: [],
+                    sparse_vector: [],
+                    relativePath: result.relativePath,
+                    startLine: result.startLine,
+                    endLine: result.endLine,
+                    fileExtension: result.fileExtension,
+                    metadata: JSON.parse(result.metadata || '{}'),
+                },
+                score: result.score,
+            }));
+
+        } catch (error) {
+            console.error(`❌ Failed to perform hybrid search on collection '${collectionName}':`, error);
+            throw error;
+        }
+    }
 }
--- a/packages/core/src/vectordb/types.ts
+++ b/packages/core/src/vectordb/types.ts
@@ -16,11 +16,34 @@ export interface SearchOptions {
    threshold?: number;
 }

+// New interfaces for hybrid search
+export interface HybridSearchRequest {
+    data: number[] | string; // Query vector or text
+    anns_field: string; // Vector field name (vector or sparse_vector)
+    param: Record<string, any>; // Search parameters
+    limit: number;
+}
+
+export interface HybridSearchOptions {
+    rerank?: RerankStrategy;
+    limit?: number;
+}
+
+export interface RerankStrategy {
+    strategy: 'rrf' | 'weighted';
+    params?: Record<string, any>;
+}
+
 export interface VectorSearchResult {
    document: VectorDocument;
    score: number;
 }

+export interface HybridSearchResult {
+    document: VectorDocument;
+    score: number;
+}
+
 export interface VectorDatabase {
    /**
     * Create collection
@@ -30,6 +53,14 @@ export interface VectorDatabase {
     */
    createCollection(collectionName: string, dimension: number, description?: string): Promise<void>;

+    /**
+     * Create collection with hybrid search support
+     * @param collectionName Collection name
+     * @param dimension Dense vector dimension
+     * @param description Collection description
+     */
+    createHybridCollection(collectionName: string, dimension: number, description?: string): Promise<void>;
+
    /**
     * Drop collection
     * @param collectionName Collection name
@@ -54,6 +85,13 @@ export interface VectorDatabase {
     */
    insert(collectionName: string, documents: VectorDocument[]): Promise<void>;

+    /**
+     * Insert hybrid vector documents
+     * @param collectionName Collection name
+     * @param documents Document array
+     */
+    insertHybrid(collectionName: string, documents: VectorDocument[]): Promise<void>;
+
    /**
     * Search similar vectors
     * @param collectionName Collection name
@@ -62,6 +100,14 @@ export interface VectorDatabase {
     */
    search(collectionName: string, queryVector: number[], options?: SearchOptions): Promise<VectorSearchResult[]>;

+    /**
+     * Hybrid search with multiple vector fields
+     * @param collectionName Collection name
+     * @param searchRequests Array of search requests for different fields
+     * @param options Hybrid search options including reranking
+     */
+    hybridSearch(collectionName: string, searchRequests: HybridSearchRequest[], options?: HybridSearchOptions): Promise<HybridSearchResult[]>;
+
    /**
     * Delete documents
     * @param collectionName Collection name
@@ -70,11 +116,11 @@ export interface VectorDatabase {
    delete(collectionName: string, ids: string[]): Promise<void>;

    /**
-     * Query documents by filter
+     * Query documents with filter conditions
     * @param collectionName Collection name
-     * @param filter Filter expression string
+     * @param filter Filter expression
     * @param outputFields Fields to return
-     * @param limit Maximum number of results to return (optional)
+     * @param limit Maximum number of results
     */
    query(collectionName: string, filter: string, outputFields: string[], limit?: number): Promise<Record<string, any>[]>;
 }
--- a/packages/mcp/src/handlers.ts
+++ b/packages/mcp/src/handlers.ts
@@ -61,8 +61,8 @@ export class ToolHandlers {
            // Check each collection for codebase path
            for (const collectionName of collections) {
                try {
-                    // Skip collections that don't match the code_chunks pattern
-                    if (!collectionName.startsWith('code_chunks_')) {
+                    // Skip collections that don't match the code_chunks pattern (support both legacy and new collections)
+                    if (!collectionName.startsWith('code_chunks_') && !collectionName.startsWith('hybrid_code_chunks_')) {
                        console.log(`[SYNC-CLOUD] ⏭️  Skipping non-code collection: ${collectionName}`);
                        continue;
                    }
@@ -218,38 +218,19 @@ export class ToolHandlers {

            // CRITICAL: Pre-index collection creation validation
            try {
-                const normalizedPath = path.resolve(absolutePath);
-                const hash = crypto.createHash('md5').update(normalizedPath).digest('hex');
-                const collectionName = `code_chunks_${hash.substring(0, 8)}`;
+                console.log(`[INDEX-VALIDATION] 🔍 Validating collection creation capability`);

-                console.log(`[INDEX-VALIDATION] 🔍 Validating collection creation for: ${collectionName}`);
-
-                // Get embedding dimension for collection creation
-                const embeddingProvider = this.context['embedding'];
-                const dimension = embeddingProvider.getDimension();
-
-                // If force reindex, clear existing collection first
-                if (forceReindex) {
-                    console.log(`[INDEX-VALIDATION] 🧹 Force reindex enabled, clearing existing collection: ${collectionName}`);
-                    try {
-                        await this.context['vectorDatabase'].dropCollection(collectionName);
-                        console.log(`[INDEX-VALIDATION] ✅ Existing collection cleared: ${collectionName}`);
-                    } catch (dropError: any) {
-                        // Collection might not exist, which is fine
-                        console.log(`[INDEX-VALIDATION] ℹ️  Collection ${collectionName} does not exist or already cleared`);
-                    }
+                // Check if collection can be created (this will be handled entirely by context.ts)
+                const hasExistingIndex = await this.context.hasIndex(absolutePath);
+                if (hasExistingIndex && forceReindex) {
+                    console.log(`[INDEX-VALIDATION] ℹ️  Force reindex enabled, existing index will be cleared`);
+                    await this.context.clearIndex(absolutePath);
+                    console.log(`[INDEX-VALIDATION] ✅ Existing index cleared for re-indexing`);
+                } else if (hasExistingIndex) {
+                    console.log(`[INDEX-VALIDATION] ℹ️  Index already exists for this codebase`);
                }

-                // Attempt to create collection - this will throw COLLECTION_LIMIT_MESSAGE if limit reached
-                await this.context['vectorDatabase'].createCollection(
-                    collectionName,
-                    dimension,
-                    `Claude Context collection: ${collectionName}`
-                );
-
-                // If creation succeeds, immediately drop the test collection
-                await this.context['vectorDatabase'].dropCollection(collectionName);
-                console.log(`[INDEX-VALIDATION] ✅ Collection creation validated successfully`);
+                console.log(`[INDEX-VALIDATION] ✅  Collection creation validation completed`);

            } catch (validationError: any) {
                const errorMessage = typeof validationError === 'string' ? validationError :
@@ -352,14 +333,9 @@ export class ToolHandlers {
                console.warn(`[BACKGROUND-INDEX] Non-AST splitter '${splitterType}' requested; falling back to AST splitter`);
            }

-            // Generate collection name
-            const normalizedPath = path.resolve(absolutePath);
-            const hash = crypto.createHash('md5').update(normalizedPath).digest('hex');
-            const collectionName = `code_chunks_${hash.substring(0, 8)}`;
-
            // Load ignore patterns from files first (including .ignore, .gitignore, etc.)
            await this.context['loadGitignorePatterns'](absolutePath);
-            
+
            // Initialize file synchronizer with proper ignore patterns (including project-specific patterns)
            const { FileSynchronizer } = await import("@zilliz/claude-context-core");
            const ignorePatterns = this.context['ignorePatterns'] || [];
@@ -367,7 +343,9 @@ export class ToolHandlers {
            const synchronizer = new FileSynchronizer(absolutePath, ignorePatterns);
            await synchronizer.initialize();

-            // Store synchronizer in the context's internal map
+            // Store synchronizer in the context (let context manage collection names)
+            await this.context['prepareCollection'](absolutePath);
+            const collectionName = this.context['getCollectionName'](absolutePath);
            this.context['synchronizers'].set(collectionName, synchronizer);
            if (contextForThisTask !== this.context) {
                contextForThisTask['synchronizers'].set(collectionName, synchronizer);
@@ -471,7 +449,7 @@ export class ToolHandlers {

            // Log embedding provider information before search
            const embeddingProvider = this.context['embedding'];
-            console.log(`[SEARCH] 🧠 Using embedding provider: ${embeddingProvider.getProvider()} for semantic search`);
+            console.log(`[SEARCH] 🧠 Using embedding provider: ${embeddingProvider.getProvider()} for search`);
            console.log(`[SEARCH] 🔍 Generating embeddings for query using ${embeddingProvider.getProvider()}...`);

            // Search in the specified codebase
@@ -505,7 +483,7 @@ export class ToolHandlers {

                return `${index + 1}. Code snippet (${result.language}) [${codebaseInfo}]\n` +
                    `   Location: ${location}\n` +
-                    `   Score: ${result.score.toFixed(3)}\n` +
+                    `   Rank: ${index + 1}\n` +
                    `   Context: \n\`\`\`${result.language}\n${context}\n\`\`\`\n`;
            }).join('\n');

--- a/packages/mcp/src/sync.ts
+++ b/packages/mcp/src/sync.ts
@@ -1,5 +1,5 @@
 import * as fs from "fs";
-import { Context } from "@zilliz/claude-context-core";
+import { Context, FileSynchronizer } from "@zilliz/claude-context-core";
 import { SnapshotManager } from "./snapshot.js";

 export class SyncManager {
@@ -79,6 +79,11 @@ export class SyncManager {
                    console.error(`[SYNC-DEBUG] Error syncing codebase '${codebasePath}' after ${codebaseElapsed}ms:`, error);
                    console.error(`[SYNC-DEBUG] Error stack:`, error.stack);

+                    if (error.message.includes('Failed to query Milvus')) {
+                        // Collection maybe deleted manually, delete the snapshot file
+                        await FileSynchronizer.deleteSnapshot(codebasePath);
+                    }
+
                    // Log additional error details
                    if (error.code) {
                        console.error(`[SYNC-DEBUG] Error code: ${error.code}`);
--- a/packages/vscode-extension/src/commands/indexCommand.ts
+++ b/packages/vscode-extension/src/commands/indexCommand.ts
@@ -1,7 +1,6 @@
 import * as vscode from 'vscode';
 import { Context } from '@zilliz/claude-context-core';
 import * as path from 'path';
-import * as crypto from 'crypto';

 export class IndexCommand {
    private context: Context;
@@ -78,10 +77,9 @@ export class IndexCommand {
                const { FileSynchronizer } = await import("@zilliz/claude-context-core");
                const synchronizer = new FileSynchronizer(selectedFolder.uri.fsPath, this.context['ignorePatterns'] || []);
                await synchronizer.initialize();
-                // Store synchronizer in the context's internal map using the same collection name generation logic
-                const normalizedPath = path.resolve(selectedFolder.uri.fsPath);
-                const hash = crypto.createHash('md5').update(normalizedPath).digest('hex');
-                const collectionName = `code_chunks_${hash.substring(0, 8)}`;
+                // Store synchronizer in the context's internal map using the collection name from context
+                await this.context['prepareCollection'](selectedFolder.uri.fsPath);
+                const collectionName = this.context['getCollectionName'](selectedFolder.uri.fsPath);
                this.context['synchronizers'].set(collectionName, synchronizer);

                // Start indexing with progress callback
--- a/packages/vscode-extension/src/commands/searchCommand.ts
+++ b/packages/vscode-extension/src/commands/searchCommand.ts
@@ -52,13 +52,25 @@ export class SearchCommand {
                }
                const codebasePath = workspaceFolders[0].uri.fsPath;

-                // Use the new semantic search service
+                // Check if index exists
+                progress.report({ increment: 20, message: 'Checking index...' });
+                const hasIndex = await this.context.hasIndex(codebasePath);
+
+                if (!hasIndex) {
+                    vscode.window.showErrorMessage('Index not found. Please index the codebase first.');
+                    return;
+                }
+
+                // Use semantic search
                const query: SearchQuery = {
                    term: searchTerm,
                    includeContent: true,
                    limit: 20
                };

+                console.log('🔍 Using semantic search...');
+                progress.report({ increment: 50, message: 'Executing semantic search...' });
+
                const results = await this.context.semanticSearch(
                    codebasePath,
                    query.term,
@@ -66,7 +78,7 @@ export class SearchCommand {
                    0.3 // similarity threshold
                );

-                progress.report({ increment: 100, message: 'Semantic search complete!' });
+                progress.report({ increment: 100, message: 'Search complete!' });

                if (results.length === 0) {
                    vscode.window.showInformationMessage(`No results found for "${searchTerm}"`);
@@ -77,7 +89,7 @@ export class SearchCommand {
                const quickPickItems = this.generateQuickPickItems(results, searchTerm, codebasePath);

                const selected = await vscode.window.showQuickPick(quickPickItems, {
-                    placeHolder: `Found ${results.length} results for "${searchTerm}"`,
+                    placeHolder: `Found ${results.length} results for "${searchTerm}" using semantic search`,
                    matchOnDescription: true,
                    matchOnDetail: true
                });
@@ -88,8 +100,8 @@ export class SearchCommand {
            });

        } catch (error) {
-            console.error('Semantic search failed:', error);
-            vscode.window.showErrorMessage(`Semantic search failed: ${error}`);
+            console.error('Search failed:', error);
+            vscode.window.showErrorMessage(`Search failed: ${error}. Please ensure the codebase is indexed.`);
        }
    }

@@ -135,7 +147,13 @@ export class SearchCommand {
        }
        const codebasePath = workspaceFolders[0].uri.fsPath;

-        // Use the semantic search service
+        // Check if index exists
+        const hasIndex = await this.context.hasIndex(codebasePath);
+        if (!hasIndex) {
+            throw new Error('Index not found. Please index the codebase first.');
+        }
+
+        console.log('🔍 Using semantic search for webview...');
        return await this.context.semanticSearch(
            codebasePath,
            searchTerm,
@@ -148,23 +166,31 @@ export class SearchCommand {
     * Check if index exists for the given codebase path
     */
    async hasIndex(codebasePath: string): Promise<boolean> {
-        return await this.context.hasIndex(codebasePath);
+        try {
+            return await this.context.hasIndex(codebasePath);
+        } catch (error) {
+            console.error('Error checking index existence:', error);
+            return false;
+        }
    }

    /**
     * Generate quick pick items for VS Code
     */
    private generateQuickPickItems(results: SemanticSearchResult[], searchTerm: string, workspaceRoot?: string) {
-        return results.slice(0, 20).map(result => {
+        return results.slice(0, 20).map((result, index) => {
            let displayPath = result.relativePath;
            // Truncate content for display
            const truncatedContent = result.content.length <= 150
                ? result.content
                : result.content.substring(0, 150) + '...';

+            // Add rank info to description
+            const rankText = ` (rank: ${index + 1})`;
+
            return {
                label: `$(file-code) ${displayPath}`,
-                description: `1 match in ${displayPath}`,
+                description: `$(search) semantic search${rankText}`,
                detail: truncatedContent,
                result: result
            };
--- a/packages/vscode-extension/src/webview/scripts/semanticSearch.js
+++ b/packages/vscode-extension/src/webview/scripts/semanticSearch.js
@@ -197,7 +197,7 @@ class SemanticSearchController {
            this.resultsList.innerHTML = '<div class="no-results">No matches found</div>';
        } else {
            this.resultsHeader.textContent = `${results.length} result${results.length === 1 ? '' : 's'} for "${query}"`;
-            this.resultsList.innerHTML = results.map(result => this.createResultHTML(result)).join('');
+            this.resultsList.innerHTML = results.map((result, index) => this.createResultHTML(result, index + 1)).join('');
        }
        this.resultsContainer.style.display = 'block';
    }
@@ -205,9 +205,10 @@ class SemanticSearchController {
    /**
     * Create HTML for a single result item
     * @param {Object} result - Result object
+     * @param {number} rank - Result rank (1-indexed)
     * @returns {string} HTML string
     */
-    createResultHTML(result) {
+    createResultHTML(result, rank) {
        return `
            <div class="result-item" onclick="searchController.openFile('${result.relativePath}', ${result.line}, ${result.startLine}, ${result.endLine})">
                <div class="result-file">
@@ -216,7 +217,7 @@ class SemanticSearchController {
                </div>
                <div class="result-preview">${result.preview}</div>
                <div class="result-context">${result.context}</div>
-                ${result.score ? `<div class="result-score" style="margin-top: 8px; text-align: right;">Similarity: ${(result.score * 100).toFixed(1)}%</div>` : ''}
+                <div class="result-rank" style="margin-top: 8px; text-align: right;">Rank: ${rank}</div>
            </div>
        `;
    }
--- a/packages/vscode-extension/src/webview/styles/semanticSearch.css
+++ b/packages/vscode-extension/src/webview/styles/semanticSearch.css
@@ -184,7 +184,7 @@ body {
    white-space: nowrap;
 }

-.result-score {
+.result-rank {
    font-size: 10px;
    color: var(--vscode-descriptionForeground);
    background-color: var(--vscode-badge-background);