mirror of
https://github.com/zilliztech/claude-context.git
synced 2025-10-06 01:10:02 +03:00
[Refactor]: Change search method to BM25 & Dense vector Hybrid search (#119)
* [Refactor]: Change search method to BM25 & Dense vector Hybrid search * [Restructure] 1.Refactor codebase to use Context class 2.Add hybrid mode environment variable Signed-off-by: ShawnZheng <shawn.zheng@zilliz.com> --------- Signed-off-by: ShawnZheng <shawn.zheng@zilliz.com>
This commit is contained in:
@@ -11,7 +11,10 @@ import {
|
|||||||
import {
|
import {
|
||||||
VectorDatabase,
|
VectorDatabase,
|
||||||
VectorDocument,
|
VectorDocument,
|
||||||
VectorSearchResult
|
VectorSearchResult,
|
||||||
|
HybridSearchRequest,
|
||||||
|
HybridSearchOptions,
|
||||||
|
HybridSearchResult
|
||||||
} from './vectordb';
|
} from './vectordb';
|
||||||
import { SemanticSearchResult } from './types';
|
import { SemanticSearchResult } from './types';
|
||||||
import { envManager } from './utils/env-manager';
|
import { envManager } from './utils/env-manager';
|
||||||
@@ -152,17 +155,30 @@ export class Context {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generate collection name based on codebase path
|
* Get isHybrid setting from environment variable with default true
|
||||||
*/
|
*/
|
||||||
private getCollectionName(codebasePath: string): string {
|
private getIsHybrid(): boolean {
|
||||||
const normalizedPath = path.resolve(codebasePath);
|
const isHybridEnv = envManager.get('HYBRID_MODE');
|
||||||
const hash = crypto.createHash('md5').update(normalizedPath).digest('hex');
|
if (isHybridEnv === undefined || isHybridEnv === null) {
|
||||||
return `code_chunks_${hash.substring(0, 8)}`;
|
return true; // Default to true
|
||||||
|
}
|
||||||
|
return isHybridEnv.toLowerCase() === 'true';
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Index entire codebase
|
* Generate collection name based on codebase path and hybrid mode
|
||||||
* @param codebasePath Codebase path
|
*/
|
||||||
|
private getCollectionName(codebasePath: string): string {
|
||||||
|
const isHybrid = this.getIsHybrid();
|
||||||
|
const normalizedPath = path.resolve(codebasePath);
|
||||||
|
const hash = crypto.createHash('md5').update(normalizedPath).digest('hex');
|
||||||
|
const prefix = isHybrid === true ? 'hybrid_code_chunks' : 'code_chunks';
|
||||||
|
return `${prefix}_${hash.substring(0, 8)}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Index a codebase for semantic search
|
||||||
|
* @param codebasePath Codebase root path
|
||||||
* @param progressCallback Optional progress callback function
|
* @param progressCallback Optional progress callback function
|
||||||
* @returns Indexing statistics
|
* @returns Indexing statistics
|
||||||
*/
|
*/
|
||||||
@@ -170,7 +186,9 @@ export class Context {
|
|||||||
codebasePath: string,
|
codebasePath: string,
|
||||||
progressCallback?: (progress: { phase: string; current: number; total: number; percentage: number }) => void
|
progressCallback?: (progress: { phase: string; current: number; total: number; percentage: number }) => void
|
||||||
): Promise<{ indexedFiles: number; totalChunks: number; status: 'completed' | 'limit_reached' }> {
|
): Promise<{ indexedFiles: number; totalChunks: number; status: 'completed' | 'limit_reached' }> {
|
||||||
console.log(`🚀 Starting to index codebase: ${codebasePath}`);
|
const isHybrid = this.getIsHybrid();
|
||||||
|
const searchType = isHybrid === true ? 'hybrid search' : 'semantic search';
|
||||||
|
console.log(`🚀 Starting to index codebase with ${searchType}: ${codebasePath}`);
|
||||||
|
|
||||||
// 1. Load ignore patterns from various ignore files
|
// 1. Load ignore patterns from various ignore files
|
||||||
await this.loadGitignorePatterns(codebasePath);
|
await this.loadGitignorePatterns(codebasePath);
|
||||||
@@ -239,7 +257,7 @@ export class Context {
|
|||||||
if (!synchronizer) {
|
if (!synchronizer) {
|
||||||
// Load project-specific ignore patterns before creating FileSynchronizer
|
// Load project-specific ignore patterns before creating FileSynchronizer
|
||||||
await this.loadGitignorePatterns(codebasePath);
|
await this.loadGitignorePatterns(codebasePath);
|
||||||
|
|
||||||
// To be safe, let's initialize if it's not there.
|
// To be safe, let's initialize if it's not there.
|
||||||
const newSynchronizer = new FileSynchronizer(codebasePath, this.ignorePatterns);
|
const newSynchronizer = new FileSynchronizer(codebasePath, this.ignorePatterns);
|
||||||
await newSynchronizer.initialize();
|
await newSynchronizer.initialize();
|
||||||
@@ -317,37 +335,118 @@ export class Context {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Semantic search
|
* Semantic search with unified implementation
|
||||||
* @param codebasePath Codebase path to search in
|
* @param codebasePath Codebase path to search in
|
||||||
* @param query Search query
|
* @param query Search query
|
||||||
* @param topK Number of results to return
|
* @param topK Number of results to return
|
||||||
* @param threshold Similarity threshold
|
* @param threshold Similarity threshold
|
||||||
*/
|
*/
|
||||||
async semanticSearch(codebasePath: string, query: string, topK: number = 5, threshold: number = 0.5): Promise<SemanticSearchResult[]> {
|
async semanticSearch(codebasePath: string, query: string, topK: number = 5, threshold: number = 0.5): Promise<SemanticSearchResult[]> {
|
||||||
console.log(`🔍 Executing semantic search: "${query}" in ${codebasePath}`);
|
const isHybrid = this.getIsHybrid();
|
||||||
|
const searchType = isHybrid === true ? 'hybrid search' : 'semantic search';
|
||||||
|
console.log(`🔍 Executing ${searchType}: "${query}" in ${codebasePath}`);
|
||||||
|
|
||||||
// 1. Generate query vector
|
const collectionName = this.getCollectionName(codebasePath);
|
||||||
const queryEmbedding: EmbeddingVector = await this.embedding.embed(query);
|
console.log(`🔍 Using collection: ${collectionName}`);
|
||||||
|
|
||||||
// 2. Search in vector database
|
// Check if collection exists and has data
|
||||||
const searchResults: VectorSearchResult[] = await this.vectorDatabase.search(
|
const hasCollection = await this.vectorDatabase.hasCollection(collectionName);
|
||||||
this.getCollectionName(codebasePath),
|
if (!hasCollection) {
|
||||||
queryEmbedding.vector,
|
console.log(`⚠️ Collection '${collectionName}' does not exist. Please index the codebase first.`);
|
||||||
{ topK, threshold }
|
return [];
|
||||||
);
|
}
|
||||||
|
|
||||||
// 3. Convert to semantic search result format
|
if (isHybrid === true) {
|
||||||
const results: SemanticSearchResult[] = searchResults.map(result => ({
|
try {
|
||||||
content: result.document.content,
|
// Check collection stats to see if it has data
|
||||||
relativePath: result.document.relativePath,
|
const stats = await this.vectorDatabase.query(collectionName, '', ['id'], 1);
|
||||||
startLine: result.document.startLine,
|
console.log(`🔍 Collection '${collectionName}' exists and appears to have data`);
|
||||||
endLine: result.document.endLine,
|
} catch (error) {
|
||||||
language: result.document.metadata.language || 'unknown',
|
console.log(`⚠️ Collection '${collectionName}' exists but may be empty or not properly indexed:`, error);
|
||||||
score: result.score
|
}
|
||||||
}));
|
|
||||||
|
|
||||||
console.log(`✅ Found ${results.length} relevant results`);
|
// 1. Generate query vector
|
||||||
return results;
|
console.log(`🔍 Generating embeddings for query: "${query}"`);
|
||||||
|
const queryEmbedding: EmbeddingVector = await this.embedding.embed(query);
|
||||||
|
console.log(`✅ Generated embedding vector with dimension: ${queryEmbedding.vector.length}`);
|
||||||
|
console.log(`🔍 First 5 embedding values: [${queryEmbedding.vector.slice(0, 5).join(', ')}]`);
|
||||||
|
|
||||||
|
// 2. Prepare hybrid search requests
|
||||||
|
const searchRequests: HybridSearchRequest[] = [
|
||||||
|
{
|
||||||
|
data: queryEmbedding.vector,
|
||||||
|
anns_field: "vector",
|
||||||
|
param: { "nprobe": 10 },
|
||||||
|
limit: topK
|
||||||
|
},
|
||||||
|
{
|
||||||
|
data: query,
|
||||||
|
anns_field: "sparse_vector",
|
||||||
|
param: { "drop_ratio_search": 0.2 },
|
||||||
|
limit: topK
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
console.log(`🔍 Search request 1 (dense): anns_field="${searchRequests[0].anns_field}", vector_dim=${queryEmbedding.vector.length}, limit=${searchRequests[0].limit}`);
|
||||||
|
console.log(`🔍 Search request 2 (sparse): anns_field="${searchRequests[1].anns_field}", query_text="${query}", limit=${searchRequests[1].limit}`);
|
||||||
|
|
||||||
|
// 3. Execute hybrid search
|
||||||
|
console.log(`🔍 Executing hybrid search with RRF reranking...`);
|
||||||
|
const searchResults: HybridSearchResult[] = await this.vectorDatabase.hybridSearch(
|
||||||
|
collectionName,
|
||||||
|
searchRequests,
|
||||||
|
{
|
||||||
|
rerank: {
|
||||||
|
strategy: 'rrf',
|
||||||
|
params: { k: 100 }
|
||||||
|
},
|
||||||
|
limit: topK
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log(`🔍 Raw search results count: ${searchResults.length}`);
|
||||||
|
|
||||||
|
// 4. Convert to semantic search result format
|
||||||
|
const results: SemanticSearchResult[] = searchResults.map(result => ({
|
||||||
|
content: result.document.content,
|
||||||
|
relativePath: result.document.relativePath,
|
||||||
|
startLine: result.document.startLine,
|
||||||
|
endLine: result.document.endLine,
|
||||||
|
language: result.document.metadata.language || 'unknown',
|
||||||
|
score: result.score
|
||||||
|
}));
|
||||||
|
|
||||||
|
console.log(`✅ Found ${results.length} relevant hybrid results`);
|
||||||
|
if (results.length > 0) {
|
||||||
|
console.log(`🔍 Top result score: ${results[0].score}, path: ${results[0].relativePath}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
} else {
|
||||||
|
// Regular semantic search
|
||||||
|
// 1. Generate query vector
|
||||||
|
const queryEmbedding: EmbeddingVector = await this.embedding.embed(query);
|
||||||
|
|
||||||
|
// 2. Search in vector database
|
||||||
|
const searchResults: VectorSearchResult[] = await this.vectorDatabase.search(
|
||||||
|
collectionName,
|
||||||
|
queryEmbedding.vector,
|
||||||
|
{ topK, threshold }
|
||||||
|
);
|
||||||
|
|
||||||
|
// 3. Convert to semantic search result format
|
||||||
|
const results: SemanticSearchResult[] = searchResults.map(result => ({
|
||||||
|
content: result.document.content,
|
||||||
|
relativePath: result.document.relativePath,
|
||||||
|
startLine: result.document.startLine,
|
||||||
|
endLine: result.document.endLine,
|
||||||
|
language: result.document.metadata.language || 'unknown',
|
||||||
|
score: result.score
|
||||||
|
}));
|
||||||
|
|
||||||
|
console.log(`✅ Found ${results.length} relevant results`);
|
||||||
|
return results;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -458,10 +557,18 @@ export class Context {
|
|||||||
* Prepare vector collection
|
* Prepare vector collection
|
||||||
*/
|
*/
|
||||||
private async prepareCollection(codebasePath: string): Promise<void> {
|
private async prepareCollection(codebasePath: string): Promise<void> {
|
||||||
// Create new collection
|
const isHybrid = this.getIsHybrid();
|
||||||
console.log(`🔧 Preparing vector collection for codebase: ${codebasePath}`);
|
const collectionType = isHybrid === true ? 'hybrid vector' : 'vector';
|
||||||
|
console.log(`🔧 Preparing ${collectionType} collection for codebase: ${codebasePath}`);
|
||||||
const collectionName = this.getCollectionName(codebasePath);
|
const collectionName = this.getCollectionName(codebasePath);
|
||||||
|
|
||||||
|
// Check if collection already exists
|
||||||
|
const collectionExists = await this.vectorDatabase.hasCollection(collectionName);
|
||||||
|
if (collectionExists) {
|
||||||
|
console.log(`📋 Collection ${collectionName} already exists, skipping creation`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// For Ollama embeddings, ensure dimension is detected before creating collection
|
// For Ollama embeddings, ensure dimension is detected before creating collection
|
||||||
if (this.embedding.getProvider() === 'Ollama' && typeof (this.embedding as any).initializeDimension === 'function') {
|
if (this.embedding.getProvider() === 'Ollama' && typeof (this.embedding as any).initializeDimension === 'function') {
|
||||||
await (this.embedding as any).initializeDimension();
|
await (this.embedding as any).initializeDimension();
|
||||||
@@ -469,7 +576,13 @@ export class Context {
|
|||||||
|
|
||||||
const dimension = this.embedding.getDimension();
|
const dimension = this.embedding.getDimension();
|
||||||
const dirName = path.basename(codebasePath);
|
const dirName = path.basename(codebasePath);
|
||||||
await this.vectorDatabase.createCollection(collectionName, dimension, `Index for ${dirName}`);
|
|
||||||
|
if (isHybrid === true) {
|
||||||
|
await this.vectorDatabase.createHybridCollection(collectionName, dimension, `Hybrid Index for ${dirName}`);
|
||||||
|
} else {
|
||||||
|
await this.vectorDatabase.createCollection(collectionName, dimension, `Index for ${dirName}`);
|
||||||
|
}
|
||||||
|
|
||||||
console.log(`✅ Collection ${collectionName} created successfully (dimension: ${dimension})`);
|
console.log(`✅ Collection ${collectionName} created successfully (dimension: ${dimension})`);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -517,6 +630,7 @@ export class Context {
|
|||||||
codebasePath: string,
|
codebasePath: string,
|
||||||
onFileProcessed?: (filePath: string, fileIndex: number, totalFiles: number) => void
|
onFileProcessed?: (filePath: string, fileIndex: number, totalFiles: number) => void
|
||||||
): Promise<{ processedFiles: number; totalChunks: number; status: 'completed' | 'limit_reached' }> {
|
): Promise<{ processedFiles: number; totalChunks: number; status: 'completed' | 'limit_reached' }> {
|
||||||
|
const isHybrid = this.getIsHybrid();
|
||||||
const EMBEDDING_BATCH_SIZE = Math.max(1, parseInt(envManager.get('EMBEDDING_BATCH_SIZE') || '100', 10));
|
const EMBEDDING_BATCH_SIZE = Math.max(1, parseInt(envManager.get('EMBEDDING_BATCH_SIZE') || '100', 10));
|
||||||
const CHUNK_LIMIT = 450000;
|
const CHUNK_LIMIT = 450000;
|
||||||
console.log(`🔧 Using EMBEDDING_BATCH_SIZE: ${EMBEDDING_BATCH_SIZE}`);
|
console.log(`🔧 Using EMBEDDING_BATCH_SIZE: ${EMBEDDING_BATCH_SIZE}`);
|
||||||
@@ -551,8 +665,8 @@ export class Context {
|
|||||||
try {
|
try {
|
||||||
await this.processChunkBuffer(chunkBuffer);
|
await this.processChunkBuffer(chunkBuffer);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
// TODO:
|
const searchType = isHybrid === true ? 'hybrid' : 'regular';
|
||||||
console.error(`❌ Failed to process chunk batch: ${error}`);
|
console.error(`❌ Failed to process chunk batch for ${searchType}: ${error}`);
|
||||||
} finally {
|
} finally {
|
||||||
chunkBuffer = []; // Always clear buffer, even on failure
|
chunkBuffer = []; // Always clear buffer, even on failure
|
||||||
}
|
}
|
||||||
@@ -580,11 +694,12 @@ export class Context {
|
|||||||
|
|
||||||
// Process any remaining chunks in the buffer
|
// Process any remaining chunks in the buffer
|
||||||
if (chunkBuffer.length > 0) {
|
if (chunkBuffer.length > 0) {
|
||||||
console.log(`📝 Processing final batch of ${chunkBuffer.length} chunks`);
|
const searchType = isHybrid === true ? 'hybrid' : 'regular';
|
||||||
|
console.log(`📝 Processing final batch of ${chunkBuffer.length} chunks for ${searchType}`);
|
||||||
try {
|
try {
|
||||||
await this.processChunkBuffer(chunkBuffer);
|
await this.processChunkBuffer(chunkBuffer);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error(`❌ Failed to process final chunk batch: ${error}`);
|
console.error(`❌ Failed to process final chunk batch for ${searchType}: ${error}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -608,7 +723,9 @@ export class Context {
|
|||||||
// Estimate tokens (rough estimation: 1 token ≈ 4 characters)
|
// Estimate tokens (rough estimation: 1 token ≈ 4 characters)
|
||||||
const estimatedTokens = chunks.reduce((sum, chunk) => sum + Math.ceil(chunk.content.length / 4), 0);
|
const estimatedTokens = chunks.reduce((sum, chunk) => sum + Math.ceil(chunk.content.length / 4), 0);
|
||||||
|
|
||||||
console.log(`🔄 Processing batch of ${chunks.length} chunks (~${estimatedTokens} tokens)`);
|
const isHybrid = this.getIsHybrid();
|
||||||
|
const searchType = isHybrid === true ? 'hybrid' : 'regular';
|
||||||
|
console.log(`🔄 Processing batch of ${chunks.length} chunks (~${estimatedTokens} tokens) for ${searchType}`);
|
||||||
await this.processChunkBatch(chunks, codebasePath);
|
await this.processChunkBatch(chunks, codebasePath);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -616,45 +733,75 @@ export class Context {
|
|||||||
* Process a batch of chunks
|
* Process a batch of chunks
|
||||||
*/
|
*/
|
||||||
private async processChunkBatch(chunks: CodeChunk[], codebasePath: string): Promise<void> {
|
private async processChunkBatch(chunks: CodeChunk[], codebasePath: string): Promise<void> {
|
||||||
|
const isHybrid = this.getIsHybrid();
|
||||||
|
|
||||||
// Generate embedding vectors
|
// Generate embedding vectors
|
||||||
const chunkContents = chunks.map(chunk => chunk.content);
|
const chunkContents = chunks.map(chunk => chunk.content);
|
||||||
const embeddings: EmbeddingVector[] = await this.embedding.embedBatch(chunkContents);
|
const embeddings = await this.embedding.embedBatch(chunkContents);
|
||||||
|
|
||||||
// Prepare vector documents
|
if (isHybrid === true) {
|
||||||
const documents: VectorDocument[] = chunks.map((chunk, index) => {
|
// Create hybrid vector documents
|
||||||
if (!chunk.metadata.filePath) {
|
const documents: VectorDocument[] = chunks.map((chunk, index) => {
|
||||||
throw new Error(`Missing filePath in chunk metadata at index ${index}`);
|
if (!chunk.metadata.filePath) {
|
||||||
}
|
throw new Error(`Missing filePath in chunk metadata at index ${index}`);
|
||||||
|
|
||||||
const relativePath = path.relative(codebasePath, chunk.metadata.filePath);
|
|
||||||
const fileExtension = path.extname(chunk.metadata.filePath);
|
|
||||||
|
|
||||||
// Extract metadata that should be stored separately
|
|
||||||
const { filePath, startLine, endLine, ...restMetadata } = chunk.metadata;
|
|
||||||
|
|
||||||
return {
|
|
||||||
id: this.generateId(relativePath, chunk.metadata.startLine || 0, chunk.metadata.endLine || 0, chunk.content),
|
|
||||||
vector: embeddings[index].vector,
|
|
||||||
content: chunk.content,
|
|
||||||
relativePath,
|
|
||||||
startLine: chunk.metadata.startLine || 0,
|
|
||||||
endLine: chunk.metadata.endLine || 0,
|
|
||||||
fileExtension,
|
|
||||||
metadata: {
|
|
||||||
...restMetadata,
|
|
||||||
codebasePath,
|
|
||||||
language: chunk.metadata.language || 'unknown',
|
|
||||||
chunkIndex: index
|
|
||||||
}
|
}
|
||||||
};
|
|
||||||
});
|
|
||||||
|
|
||||||
// Store to vector database
|
const relativePath = path.relative(codebasePath, chunk.metadata.filePath);
|
||||||
await this.vectorDatabase.insert(this.getCollectionName(codebasePath), documents);
|
const fileExtension = path.extname(chunk.metadata.filePath);
|
||||||
|
const { filePath, startLine, endLine, ...restMetadata } = chunk.metadata;
|
||||||
|
|
||||||
|
return {
|
||||||
|
id: this.generateId(relativePath, chunk.metadata.startLine || 0, chunk.metadata.endLine || 0, chunk.content),
|
||||||
|
content: chunk.content, // Full text content for BM25 and storage
|
||||||
|
vector: embeddings[index].vector, // Dense vector
|
||||||
|
relativePath,
|
||||||
|
startLine: chunk.metadata.startLine || 0,
|
||||||
|
endLine: chunk.metadata.endLine || 0,
|
||||||
|
fileExtension,
|
||||||
|
metadata: {
|
||||||
|
...restMetadata,
|
||||||
|
codebasePath,
|
||||||
|
language: chunk.metadata.language || 'unknown',
|
||||||
|
chunkIndex: index
|
||||||
|
}
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
// Store to vector database
|
||||||
|
await this.vectorDatabase.insertHybrid(this.getCollectionName(codebasePath), documents);
|
||||||
|
} else {
|
||||||
|
// Create regular vector documents
|
||||||
|
const documents: VectorDocument[] = chunks.map((chunk, index) => {
|
||||||
|
if (!chunk.metadata.filePath) {
|
||||||
|
throw new Error(`Missing filePath in chunk metadata at index ${index}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const relativePath = path.relative(codebasePath, chunk.metadata.filePath);
|
||||||
|
const fileExtension = path.extname(chunk.metadata.filePath);
|
||||||
|
const { filePath, startLine, endLine, ...restMetadata } = chunk.metadata;
|
||||||
|
|
||||||
|
return {
|
||||||
|
id: this.generateId(relativePath, chunk.metadata.startLine || 0, chunk.metadata.endLine || 0, chunk.content),
|
||||||
|
vector: embeddings[index].vector,
|
||||||
|
content: chunk.content,
|
||||||
|
relativePath,
|
||||||
|
startLine: chunk.metadata.startLine || 0,
|
||||||
|
endLine: chunk.metadata.endLine || 0,
|
||||||
|
fileExtension,
|
||||||
|
metadata: {
|
||||||
|
...restMetadata,
|
||||||
|
codebasePath,
|
||||||
|
language: chunk.metadata.language || 'unknown',
|
||||||
|
chunkIndex: index
|
||||||
|
}
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
// Store to vector database
|
||||||
|
await this.vectorDatabase.insert(this.getCollectionName(codebasePath), documents);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get programming language based on file extension
|
* Get programming language based on file extension
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -4,6 +4,10 @@ export {
|
|||||||
SearchOptions,
|
SearchOptions,
|
||||||
VectorSearchResult,
|
VectorSearchResult,
|
||||||
VectorDatabase,
|
VectorDatabase,
|
||||||
|
HybridSearchRequest,
|
||||||
|
HybridSearchOptions,
|
||||||
|
HybridSearchResult,
|
||||||
|
RerankStrategy,
|
||||||
COLLECTION_LIMIT_MESSAGE
|
COLLECTION_LIMIT_MESSAGE
|
||||||
} from './types';
|
} from './types';
|
||||||
|
|
||||||
|
|||||||
@@ -3,6 +3,9 @@ import {
|
|||||||
SearchOptions,
|
SearchOptions,
|
||||||
VectorSearchResult,
|
VectorSearchResult,
|
||||||
VectorDatabase,
|
VectorDatabase,
|
||||||
|
HybridSearchRequest,
|
||||||
|
HybridSearchOptions,
|
||||||
|
HybridSearchResult,
|
||||||
COLLECTION_LIMIT_MESSAGE
|
COLLECTION_LIMIT_MESSAGE
|
||||||
} from './types';
|
} from './types';
|
||||||
import { ClusterManager } from './zilliz-utils';
|
import { ClusterManager } from './zilliz-utils';
|
||||||
@@ -467,4 +470,277 @@ export class MilvusRestfulVectorDatabase implements VectorDatabase {
|
|||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async createHybridCollection(collectionName: string, dimension: number, description?: string): Promise<void> {
|
||||||
|
try {
|
||||||
|
const restfulConfig = this.config as MilvusRestfulConfig;
|
||||||
|
|
||||||
|
const collectionSchema = {
|
||||||
|
collectionName,
|
||||||
|
dbName: restfulConfig.database,
|
||||||
|
schema: {
|
||||||
|
enableDynamicField: false,
|
||||||
|
functions: [
|
||||||
|
{
|
||||||
|
name: "content_bm25_emb",
|
||||||
|
description: "content bm25 function",
|
||||||
|
type: "BM25",
|
||||||
|
inputFieldNames: ["content"],
|
||||||
|
outputFieldNames: ["sparse_vector"],
|
||||||
|
params: {},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
fields: [
|
||||||
|
{
|
||||||
|
fieldName: "id",
|
||||||
|
dataType: "VarChar",
|
||||||
|
isPrimary: true,
|
||||||
|
elementTypeParams: {
|
||||||
|
max_length: 512
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
fieldName: "content",
|
||||||
|
dataType: "VarChar",
|
||||||
|
elementTypeParams: {
|
||||||
|
max_length: 65535,
|
||||||
|
enable_analyzer: true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
fieldName: "vector",
|
||||||
|
dataType: "FloatVector",
|
||||||
|
elementTypeParams: {
|
||||||
|
dim: dimension
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
fieldName: "sparse_vector",
|
||||||
|
dataType: "SparseFloatVector"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
fieldName: "relativePath",
|
||||||
|
dataType: "VarChar",
|
||||||
|
elementTypeParams: {
|
||||||
|
max_length: 1024
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
fieldName: "startLine",
|
||||||
|
dataType: "Int64"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
fieldName: "endLine",
|
||||||
|
dataType: "Int64"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
fieldName: "fileExtension",
|
||||||
|
dataType: "VarChar",
|
||||||
|
elementTypeParams: {
|
||||||
|
max_length: 32
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
fieldName: "metadata",
|
||||||
|
dataType: "VarChar",
|
||||||
|
elementTypeParams: {
|
||||||
|
max_length: 65535
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Step 1: Create collection with schema and functions
|
||||||
|
await createCollectionWithLimitCheck(this.makeRequest.bind(this), collectionSchema);
|
||||||
|
|
||||||
|
// Step 2: Create indexes for both vector fields
|
||||||
|
await this.createHybridIndexes(collectionName);
|
||||||
|
|
||||||
|
// Step 3: Load collection to memory for searching
|
||||||
|
await this.loadCollection(collectionName);
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`❌ Failed to create hybrid collection '${collectionName}':`, error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private async createHybridIndexes(collectionName: string): Promise<void> {
|
||||||
|
try {
|
||||||
|
const restfulConfig = this.config as MilvusRestfulConfig;
|
||||||
|
|
||||||
|
// Create index for dense vector
|
||||||
|
const denseIndexParams = {
|
||||||
|
collectionName,
|
||||||
|
dbName: restfulConfig.database,
|
||||||
|
indexParams: [
|
||||||
|
{
|
||||||
|
fieldName: "vector",
|
||||||
|
indexName: "vector_index",
|
||||||
|
metricType: "COSINE",
|
||||||
|
index_type: "AUTOINDEX"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
};
|
||||||
|
await this.makeRequest('/indexes/create', 'POST', denseIndexParams);
|
||||||
|
|
||||||
|
// Create index for sparse vector
|
||||||
|
const sparseIndexParams = {
|
||||||
|
collectionName,
|
||||||
|
dbName: restfulConfig.database,
|
||||||
|
indexParams: [
|
||||||
|
{
|
||||||
|
fieldName: "sparse_vector",
|
||||||
|
indexName: "sparse_vector_index",
|
||||||
|
metricType: "BM25",
|
||||||
|
index_type: "SPARSE_INVERTED_INDEX"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
};
|
||||||
|
await this.makeRequest('/indexes/create', 'POST', sparseIndexParams);
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`❌ Failed to create hybrid indexes for collection '${collectionName}':`, error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async insertHybrid(collectionName: string, documents: VectorDocument[]): Promise<void> {
|
||||||
|
await this.ensureInitialized();
|
||||||
|
|
||||||
|
try {
|
||||||
|
const restfulConfig = this.config as MilvusRestfulConfig;
|
||||||
|
|
||||||
|
const data = documents.map(doc => ({
|
||||||
|
id: doc.id,
|
||||||
|
content: doc.content,
|
||||||
|
vector: doc.vector,
|
||||||
|
relativePath: doc.relativePath,
|
||||||
|
startLine: doc.startLine,
|
||||||
|
endLine: doc.endLine,
|
||||||
|
fileExtension: doc.fileExtension,
|
||||||
|
metadata: JSON.stringify(doc.metadata),
|
||||||
|
}));
|
||||||
|
|
||||||
|
const insertRequest = {
|
||||||
|
collectionName,
|
||||||
|
dbName: restfulConfig.database,
|
||||||
|
data: data
|
||||||
|
};
|
||||||
|
|
||||||
|
const response = await this.makeRequest('/entities/insert', 'POST', insertRequest);
|
||||||
|
|
||||||
|
if (response.code !== 0) {
|
||||||
|
throw new Error(`Insert failed: ${response.message || 'Unknown error'}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`❌ Failed to insert hybrid documents to collection '${collectionName}':`, error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async hybridSearch(collectionName: string, searchRequests: HybridSearchRequest[], options?: HybridSearchOptions): Promise<HybridSearchResult[]> {
|
||||||
|
await this.ensureInitialized();
|
||||||
|
|
||||||
|
try {
|
||||||
|
const restfulConfig = this.config as MilvusRestfulConfig;
|
||||||
|
|
||||||
|
console.log(`🔍 Preparing hybrid search for collection: ${collectionName}`);
|
||||||
|
|
||||||
|
// Prepare search requests according to Milvus REST API hybrid search specification
|
||||||
|
// For dense vector search - data must be array of vectors: [[0.1, 0.2, 0.3, ...]]
|
||||||
|
const search_param_1 = {
|
||||||
|
data: Array.isArray(searchRequests[0].data) ? [searchRequests[0].data] : [[searchRequests[0].data]],
|
||||||
|
annsField: searchRequests[0].anns_field, // "vector"
|
||||||
|
limit: searchRequests[0].limit,
|
||||||
|
outputFields: ["*"],
|
||||||
|
searchParams: {
|
||||||
|
metricType: "COSINE",
|
||||||
|
params: searchRequests[0].param || { "nprobe": 10 }
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// For sparse vector search - data must be array of queries: ["query text"]
|
||||||
|
const search_param_2 = {
|
||||||
|
data: Array.isArray(searchRequests[1].data) ? searchRequests[1].data : [searchRequests[1].data],
|
||||||
|
annsField: searchRequests[1].anns_field, // "sparse_vector"
|
||||||
|
limit: searchRequests[1].limit,
|
||||||
|
outputFields: ["*"],
|
||||||
|
searchParams: {
|
||||||
|
metricType: "BM25",
|
||||||
|
params: searchRequests[1].param || { "drop_ratio_search": 0.2 }
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const rerank_strategy = {
|
||||||
|
strategy: "rrf",
|
||||||
|
params: {
|
||||||
|
k: 100
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log(`🔍 Dense search params:`, JSON.stringify({
|
||||||
|
annsField: search_param_1.annsField,
|
||||||
|
limit: search_param_1.limit,
|
||||||
|
data_length: Array.isArray(search_param_1.data[0]) ? search_param_1.data[0].length : 'N/A',
|
||||||
|
searchParams: search_param_1.searchParams
|
||||||
|
}, null, 2));
|
||||||
|
console.log(`🔍 Sparse search params:`, JSON.stringify({
|
||||||
|
annsField: search_param_2.annsField,
|
||||||
|
limit: search_param_2.limit,
|
||||||
|
query_text: typeof search_param_2.data[0] === 'string' ? search_param_2.data[0].substring(0, 50) + '...' : 'N/A',
|
||||||
|
searchParams: search_param_2.searchParams
|
||||||
|
}, null, 2));
|
||||||
|
|
||||||
|
const hybridSearchRequest = {
|
||||||
|
collectionName,
|
||||||
|
dbName: restfulConfig.database,
|
||||||
|
search: [search_param_1, search_param_2],
|
||||||
|
rerank: rerank_strategy,
|
||||||
|
limit: options?.limit || searchRequests[0]?.limit || 10,
|
||||||
|
outputFields: ['id', 'content', 'relativePath', 'startLine', 'endLine', 'fileExtension', 'metadata'],
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log(`🔍 Complete REST API request:`, JSON.stringify({
|
||||||
|
collectionName: hybridSearchRequest.collectionName,
|
||||||
|
dbName: hybridSearchRequest.dbName,
|
||||||
|
search_count: hybridSearchRequest.search.length,
|
||||||
|
rerank: hybridSearchRequest.rerank,
|
||||||
|
limit: hybridSearchRequest.limit,
|
||||||
|
outputFields: hybridSearchRequest.outputFields
|
||||||
|
}, null, 2));
|
||||||
|
|
||||||
|
console.log(`🔍 Executing REST API hybrid search...`);
|
||||||
|
const response = await this.makeRequest('/entities/hybrid_search', 'POST', hybridSearchRequest);
|
||||||
|
|
||||||
|
if (response.code !== 0) {
|
||||||
|
throw new Error(`Hybrid search failed: ${response.message || 'Unknown error'}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const results = response.data || [];
|
||||||
|
console.log(`✅ Found ${results.length} results from hybrid search`);
|
||||||
|
|
||||||
|
// Transform response to HybridSearchResult format
|
||||||
|
return results.map((result: any) => ({
|
||||||
|
document: {
|
||||||
|
id: result.id,
|
||||||
|
content: result.content,
|
||||||
|
vector: [], // Vector not returned in search results
|
||||||
|
sparse_vector: [], // Vector not returned in search results
|
||||||
|
relativePath: result.relativePath,
|
||||||
|
startLine: result.startLine,
|
||||||
|
endLine: result.endLine,
|
||||||
|
fileExtension: result.fileExtension,
|
||||||
|
metadata: JSON.parse(result.metadata || '{}'),
|
||||||
|
},
|
||||||
|
score: result.score || result.distance || 0,
|
||||||
|
}));
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`❌ Failed to perform hybrid search on collection '${collectionName}':`, error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
@@ -1,9 +1,12 @@
|
|||||||
import { MilvusClient, DataType, MetricType } from '@zilliz/milvus2-sdk-node';
|
import { MilvusClient, DataType, MetricType, FunctionType } from '@zilliz/milvus2-sdk-node';
|
||||||
import {
|
import {
|
||||||
VectorDocument,
|
VectorDocument,
|
||||||
SearchOptions,
|
SearchOptions,
|
||||||
VectorSearchResult,
|
VectorSearchResult,
|
||||||
VectorDatabase,
|
VectorDatabase,
|
||||||
|
HybridSearchRequest,
|
||||||
|
HybridSearchOptions,
|
||||||
|
HybridSearchResult,
|
||||||
COLLECTION_LIMIT_MESSAGE
|
COLLECTION_LIMIT_MESSAGE
|
||||||
} from './types';
|
} from './types';
|
||||||
import { ClusterManager } from './zilliz-utils';
|
import { ClusterManager } from './zilliz-utils';
|
||||||
@@ -298,4 +301,231 @@ export class MilvusVectorDatabase implements VectorDatabase {
|
|||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async createHybridCollection(collectionName: string, dimension: number, description?: string): Promise<void> {
|
||||||
|
await this.ensureInitialized();
|
||||||
|
|
||||||
|
console.log('Beginning hybrid collection creation:', collectionName);
|
||||||
|
console.log('Collection dimension:', dimension);
|
||||||
|
|
||||||
|
const schema = [
|
||||||
|
{
|
||||||
|
name: 'id',
|
||||||
|
description: 'Document ID',
|
||||||
|
data_type: DataType.VarChar,
|
||||||
|
max_length: 512,
|
||||||
|
is_primary_key: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'content',
|
||||||
|
description: 'Full text content for BM25 and storage',
|
||||||
|
data_type: DataType.VarChar,
|
||||||
|
max_length: 65535,
|
||||||
|
enable_analyzer: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'vector',
|
||||||
|
description: 'Dense vector embedding',
|
||||||
|
data_type: DataType.FloatVector,
|
||||||
|
dim: dimension,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'sparse_vector',
|
||||||
|
description: 'Sparse vector embedding from BM25',
|
||||||
|
data_type: DataType.SparseFloatVector,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'relativePath',
|
||||||
|
description: 'Relative path to the codebase',
|
||||||
|
data_type: DataType.VarChar,
|
||||||
|
max_length: 1024,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'startLine',
|
||||||
|
description: 'Start line number of the chunk',
|
||||||
|
data_type: DataType.Int64,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'endLine',
|
||||||
|
description: 'End line number of the chunk',
|
||||||
|
data_type: DataType.Int64,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'fileExtension',
|
||||||
|
description: 'File extension',
|
||||||
|
data_type: DataType.VarChar,
|
||||||
|
max_length: 32,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'metadata',
|
||||||
|
description: 'Additional document metadata as JSON string',
|
||||||
|
data_type: DataType.VarChar,
|
||||||
|
max_length: 65535,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
// Add BM25 function
|
||||||
|
const functions = [
|
||||||
|
{
|
||||||
|
name: "content_bm25_emb",
|
||||||
|
description: "content bm25 function",
|
||||||
|
type: FunctionType.BM25,
|
||||||
|
input_field_names: ["content"],
|
||||||
|
output_field_names: ["sparse_vector"],
|
||||||
|
params: {},
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
const createCollectionParams = {
|
||||||
|
collection_name: collectionName,
|
||||||
|
description: description || `Hybrid code context collection: ${collectionName}`,
|
||||||
|
fields: schema,
|
||||||
|
functions: functions,
|
||||||
|
};
|
||||||
|
|
||||||
|
await createCollectionWithLimitCheck(this.client!, createCollectionParams);
|
||||||
|
|
||||||
|
// Create indexes for both vector fields
|
||||||
|
// Index for dense vector
|
||||||
|
const denseIndexParams = {
|
||||||
|
collection_name: collectionName,
|
||||||
|
field_name: 'vector',
|
||||||
|
index_type: 'AUTOINDEX',
|
||||||
|
metric_type: MetricType.COSINE,
|
||||||
|
};
|
||||||
|
await this.client!.createIndex(denseIndexParams);
|
||||||
|
|
||||||
|
// Index for sparse vector
|
||||||
|
const sparseIndexParams = {
|
||||||
|
collection_name: collectionName,
|
||||||
|
field_name: 'sparse_vector',
|
||||||
|
index_type: 'SPARSE_INVERTED_INDEX',
|
||||||
|
metric_type: MetricType.BM25,
|
||||||
|
};
|
||||||
|
await this.client!.createIndex(sparseIndexParams);
|
||||||
|
|
||||||
|
// Load collection to memory
|
||||||
|
await this.client!.loadCollection({
|
||||||
|
collection_name: collectionName,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Verify collection is created correctly
|
||||||
|
await this.client!.describeCollection({
|
||||||
|
collection_name: collectionName,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async insertHybrid(collectionName: string, documents: VectorDocument[]): Promise<void> {
|
||||||
|
await this.ensureInitialized();
|
||||||
|
|
||||||
|
const data = documents.map(doc => ({
|
||||||
|
id: doc.id,
|
||||||
|
content: doc.content,
|
||||||
|
vector: doc.vector,
|
||||||
|
relativePath: doc.relativePath,
|
||||||
|
startLine: doc.startLine,
|
||||||
|
endLine: doc.endLine,
|
||||||
|
fileExtension: doc.fileExtension,
|
||||||
|
metadata: JSON.stringify(doc.metadata),
|
||||||
|
}));
|
||||||
|
|
||||||
|
await this.client!.insert({
|
||||||
|
collection_name: collectionName,
|
||||||
|
data: data,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async hybridSearch(collectionName: string, searchRequests: HybridSearchRequest[], options?: HybridSearchOptions): Promise<HybridSearchResult[]> {
|
||||||
|
await this.ensureInitialized();
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Generate OpenAI embedding for the first search request (dense)
|
||||||
|
console.log(`🔍 Preparing hybrid search for collection: ${collectionName}`);
|
||||||
|
|
||||||
|
// Prepare search requests in the correct Milvus format
|
||||||
|
const search_param_1 = {
|
||||||
|
data: Array.isArray(searchRequests[0].data) ? searchRequests[0].data : [searchRequests[0].data],
|
||||||
|
anns_field: searchRequests[0].anns_field, // "vector"
|
||||||
|
param: searchRequests[0].param, // {"nprobe": 10}
|
||||||
|
limit: searchRequests[0].limit
|
||||||
|
};
|
||||||
|
|
||||||
|
const search_param_2 = {
|
||||||
|
data: searchRequests[1].data, // query text for sparse search
|
||||||
|
anns_field: searchRequests[1].anns_field, // "sparse_vector"
|
||||||
|
param: searchRequests[1].param, // {"drop_ratio_search": 0.2}
|
||||||
|
limit: searchRequests[1].limit
|
||||||
|
};
|
||||||
|
|
||||||
|
// Set rerank strategy to RRF (100) by default
|
||||||
|
const rerank_strategy = {
|
||||||
|
strategy: "rrf",
|
||||||
|
params: {
|
||||||
|
k: 100
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log(`🔍 Dense search params:`, JSON.stringify({
|
||||||
|
anns_field: search_param_1.anns_field,
|
||||||
|
param: search_param_1.param,
|
||||||
|
limit: search_param_1.limit,
|
||||||
|
data_length: Array.isArray(search_param_1.data[0]) ? search_param_1.data[0].length : 'N/A'
|
||||||
|
}, null, 2));
|
||||||
|
console.log(`🔍 Sparse search params:`, JSON.stringify({
|
||||||
|
anns_field: search_param_2.anns_field,
|
||||||
|
param: search_param_2.param,
|
||||||
|
limit: search_param_2.limit,
|
||||||
|
query_text: typeof search_param_2.data === 'string' ? search_param_2.data.substring(0, 50) + '...' : 'N/A'
|
||||||
|
}, null, 2));
|
||||||
|
console.log(`🔍 Rerank strategy:`, JSON.stringify(rerank_strategy, null, 2));
|
||||||
|
|
||||||
|
// Execute hybrid search using the correct client.search format
|
||||||
|
const searchParams = {
|
||||||
|
collection_name: collectionName,
|
||||||
|
data: [search_param_1, search_param_2],
|
||||||
|
limit: options?.limit || searchRequests[0]?.limit || 10,
|
||||||
|
rerank: rerank_strategy,
|
||||||
|
output_fields: ['id', 'content', 'relativePath', 'startLine', 'endLine', 'fileExtension', 'metadata'],
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log(`🔍 Complete search request:`, JSON.stringify({
|
||||||
|
collection_name: searchParams.collection_name,
|
||||||
|
data_count: searchParams.data.length,
|
||||||
|
limit: searchParams.limit,
|
||||||
|
rerank: searchParams.rerank,
|
||||||
|
output_fields: searchParams.output_fields
|
||||||
|
}, null, 2));
|
||||||
|
|
||||||
|
const searchResult = await this.client!.search(searchParams);
|
||||||
|
|
||||||
|
console.log(`🔍 Search executed, processing results...`);
|
||||||
|
|
||||||
|
if (!searchResult.results || searchResult.results.length === 0) {
|
||||||
|
console.log(`⚠️ No results returned from Milvus search`);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`✅ Found ${searchResult.results.length} results from hybrid search`);
|
||||||
|
|
||||||
|
// Transform results to HybridSearchResult format
|
||||||
|
return searchResult.results.map((result: any) => ({
|
||||||
|
document: {
|
||||||
|
id: result.id,
|
||||||
|
content: result.content,
|
||||||
|
vector: [],
|
||||||
|
sparse_vector: [],
|
||||||
|
relativePath: result.relativePath,
|
||||||
|
startLine: result.startLine,
|
||||||
|
endLine: result.endLine,
|
||||||
|
fileExtension: result.fileExtension,
|
||||||
|
metadata: JSON.parse(result.metadata || '{}'),
|
||||||
|
},
|
||||||
|
score: result.score,
|
||||||
|
}));
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`❌ Failed to perform hybrid search on collection '${collectionName}':`, error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
@@ -16,11 +16,34 @@ export interface SearchOptions {
|
|||||||
threshold?: number;
|
threshold?: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// New interfaces for hybrid search
|
||||||
|
export interface HybridSearchRequest {
|
||||||
|
data: number[] | string; // Query vector or text
|
||||||
|
anns_field: string; // Vector field name (vector or sparse_vector)
|
||||||
|
param: Record<string, any>; // Search parameters
|
||||||
|
limit: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface HybridSearchOptions {
|
||||||
|
rerank?: RerankStrategy;
|
||||||
|
limit?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface RerankStrategy {
|
||||||
|
strategy: 'rrf' | 'weighted';
|
||||||
|
params?: Record<string, any>;
|
||||||
|
}
|
||||||
|
|
||||||
export interface VectorSearchResult {
|
export interface VectorSearchResult {
|
||||||
document: VectorDocument;
|
document: VectorDocument;
|
||||||
score: number;
|
score: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface HybridSearchResult {
|
||||||
|
document: VectorDocument;
|
||||||
|
score: number;
|
||||||
|
}
|
||||||
|
|
||||||
export interface VectorDatabase {
|
export interface VectorDatabase {
|
||||||
/**
|
/**
|
||||||
* Create collection
|
* Create collection
|
||||||
@@ -30,6 +53,14 @@ export interface VectorDatabase {
|
|||||||
*/
|
*/
|
||||||
createCollection(collectionName: string, dimension: number, description?: string): Promise<void>;
|
createCollection(collectionName: string, dimension: number, description?: string): Promise<void>;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create collection with hybrid search support
|
||||||
|
* @param collectionName Collection name
|
||||||
|
* @param dimension Dense vector dimension
|
||||||
|
* @param description Collection description
|
||||||
|
*/
|
||||||
|
createHybridCollection(collectionName: string, dimension: number, description?: string): Promise<void>;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Drop collection
|
* Drop collection
|
||||||
* @param collectionName Collection name
|
* @param collectionName Collection name
|
||||||
@@ -54,6 +85,13 @@ export interface VectorDatabase {
|
|||||||
*/
|
*/
|
||||||
insert(collectionName: string, documents: VectorDocument[]): Promise<void>;
|
insert(collectionName: string, documents: VectorDocument[]): Promise<void>;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Insert hybrid vector documents
|
||||||
|
* @param collectionName Collection name
|
||||||
|
* @param documents Document array
|
||||||
|
*/
|
||||||
|
insertHybrid(collectionName: string, documents: VectorDocument[]): Promise<void>;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Search similar vectors
|
* Search similar vectors
|
||||||
* @param collectionName Collection name
|
* @param collectionName Collection name
|
||||||
@@ -62,6 +100,14 @@ export interface VectorDatabase {
|
|||||||
*/
|
*/
|
||||||
search(collectionName: string, queryVector: number[], options?: SearchOptions): Promise<VectorSearchResult[]>;
|
search(collectionName: string, queryVector: number[], options?: SearchOptions): Promise<VectorSearchResult[]>;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Hybrid search with multiple vector fields
|
||||||
|
* @param collectionName Collection name
|
||||||
|
* @param searchRequests Array of search requests for different fields
|
||||||
|
* @param options Hybrid search options including reranking
|
||||||
|
*/
|
||||||
|
hybridSearch(collectionName: string, searchRequests: HybridSearchRequest[], options?: HybridSearchOptions): Promise<HybridSearchResult[]>;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Delete documents
|
* Delete documents
|
||||||
* @param collectionName Collection name
|
* @param collectionName Collection name
|
||||||
@@ -70,11 +116,11 @@ export interface VectorDatabase {
|
|||||||
delete(collectionName: string, ids: string[]): Promise<void>;
|
delete(collectionName: string, ids: string[]): Promise<void>;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Query documents by filter
|
* Query documents with filter conditions
|
||||||
* @param collectionName Collection name
|
* @param collectionName Collection name
|
||||||
* @param filter Filter expression string
|
* @param filter Filter expression
|
||||||
* @param outputFields Fields to return
|
* @param outputFields Fields to return
|
||||||
* @param limit Maximum number of results to return (optional)
|
* @param limit Maximum number of results
|
||||||
*/
|
*/
|
||||||
query(collectionName: string, filter: string, outputFields: string[], limit?: number): Promise<Record<string, any>[]>;
|
query(collectionName: string, filter: string, outputFields: string[], limit?: number): Promise<Record<string, any>[]>;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -61,8 +61,8 @@ export class ToolHandlers {
|
|||||||
// Check each collection for codebase path
|
// Check each collection for codebase path
|
||||||
for (const collectionName of collections) {
|
for (const collectionName of collections) {
|
||||||
try {
|
try {
|
||||||
// Skip collections that don't match the code_chunks pattern
|
// Skip collections that don't match the code_chunks pattern (support both legacy and new collections)
|
||||||
if (!collectionName.startsWith('code_chunks_')) {
|
if (!collectionName.startsWith('code_chunks_') && !collectionName.startsWith('hybrid_code_chunks_')) {
|
||||||
console.log(`[SYNC-CLOUD] ⏭️ Skipping non-code collection: ${collectionName}`);
|
console.log(`[SYNC-CLOUD] ⏭️ Skipping non-code collection: ${collectionName}`);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -218,38 +218,19 @@ export class ToolHandlers {
|
|||||||
|
|
||||||
// CRITICAL: Pre-index collection creation validation
|
// CRITICAL: Pre-index collection creation validation
|
||||||
try {
|
try {
|
||||||
const normalizedPath = path.resolve(absolutePath);
|
console.log(`[INDEX-VALIDATION] 🔍 Validating collection creation capability`);
|
||||||
const hash = crypto.createHash('md5').update(normalizedPath).digest('hex');
|
|
||||||
const collectionName = `code_chunks_${hash.substring(0, 8)}`;
|
|
||||||
|
|
||||||
console.log(`[INDEX-VALIDATION] 🔍 Validating collection creation for: ${collectionName}`);
|
// Check if collection can be created (this will be handled entirely by context.ts)
|
||||||
|
const hasExistingIndex = await this.context.hasIndex(absolutePath);
|
||||||
// Get embedding dimension for collection creation
|
if (hasExistingIndex && forceReindex) {
|
||||||
const embeddingProvider = this.context['embedding'];
|
console.log(`[INDEX-VALIDATION] ℹ️ Force reindex enabled, existing index will be cleared`);
|
||||||
const dimension = embeddingProvider.getDimension();
|
await this.context.clearIndex(absolutePath);
|
||||||
|
console.log(`[INDEX-VALIDATION] ✅ Existing index cleared for re-indexing`);
|
||||||
// If force reindex, clear existing collection first
|
} else if (hasExistingIndex) {
|
||||||
if (forceReindex) {
|
console.log(`[INDEX-VALIDATION] ℹ️ Index already exists for this codebase`);
|
||||||
console.log(`[INDEX-VALIDATION] 🧹 Force reindex enabled, clearing existing collection: ${collectionName}`);
|
|
||||||
try {
|
|
||||||
await this.context['vectorDatabase'].dropCollection(collectionName);
|
|
||||||
console.log(`[INDEX-VALIDATION] ✅ Existing collection cleared: ${collectionName}`);
|
|
||||||
} catch (dropError: any) {
|
|
||||||
// Collection might not exist, which is fine
|
|
||||||
console.log(`[INDEX-VALIDATION] ℹ️ Collection ${collectionName} does not exist or already cleared`);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Attempt to create collection - this will throw COLLECTION_LIMIT_MESSAGE if limit reached
|
console.log(`[INDEX-VALIDATION] ✅ Collection creation validation completed`);
|
||||||
await this.context['vectorDatabase'].createCollection(
|
|
||||||
collectionName,
|
|
||||||
dimension,
|
|
||||||
`Claude Context collection: ${collectionName}`
|
|
||||||
);
|
|
||||||
|
|
||||||
// If creation succeeds, immediately drop the test collection
|
|
||||||
await this.context['vectorDatabase'].dropCollection(collectionName);
|
|
||||||
console.log(`[INDEX-VALIDATION] ✅ Collection creation validated successfully`);
|
|
||||||
|
|
||||||
} catch (validationError: any) {
|
} catch (validationError: any) {
|
||||||
const errorMessage = typeof validationError === 'string' ? validationError :
|
const errorMessage = typeof validationError === 'string' ? validationError :
|
||||||
@@ -352,14 +333,9 @@ export class ToolHandlers {
|
|||||||
console.warn(`[BACKGROUND-INDEX] Non-AST splitter '${splitterType}' requested; falling back to AST splitter`);
|
console.warn(`[BACKGROUND-INDEX] Non-AST splitter '${splitterType}' requested; falling back to AST splitter`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generate collection name
|
|
||||||
const normalizedPath = path.resolve(absolutePath);
|
|
||||||
const hash = crypto.createHash('md5').update(normalizedPath).digest('hex');
|
|
||||||
const collectionName = `code_chunks_${hash.substring(0, 8)}`;
|
|
||||||
|
|
||||||
// Load ignore patterns from files first (including .ignore, .gitignore, etc.)
|
// Load ignore patterns from files first (including .ignore, .gitignore, etc.)
|
||||||
await this.context['loadGitignorePatterns'](absolutePath);
|
await this.context['loadGitignorePatterns'](absolutePath);
|
||||||
|
|
||||||
// Initialize file synchronizer with proper ignore patterns (including project-specific patterns)
|
// Initialize file synchronizer with proper ignore patterns (including project-specific patterns)
|
||||||
const { FileSynchronizer } = await import("@zilliz/claude-context-core");
|
const { FileSynchronizer } = await import("@zilliz/claude-context-core");
|
||||||
const ignorePatterns = this.context['ignorePatterns'] || [];
|
const ignorePatterns = this.context['ignorePatterns'] || [];
|
||||||
@@ -367,7 +343,9 @@ export class ToolHandlers {
|
|||||||
const synchronizer = new FileSynchronizer(absolutePath, ignorePatterns);
|
const synchronizer = new FileSynchronizer(absolutePath, ignorePatterns);
|
||||||
await synchronizer.initialize();
|
await synchronizer.initialize();
|
||||||
|
|
||||||
// Store synchronizer in the context's internal map
|
// Store synchronizer in the context (let context manage collection names)
|
||||||
|
await this.context['prepareCollection'](absolutePath);
|
||||||
|
const collectionName = this.context['getCollectionName'](absolutePath);
|
||||||
this.context['synchronizers'].set(collectionName, synchronizer);
|
this.context['synchronizers'].set(collectionName, synchronizer);
|
||||||
if (contextForThisTask !== this.context) {
|
if (contextForThisTask !== this.context) {
|
||||||
contextForThisTask['synchronizers'].set(collectionName, synchronizer);
|
contextForThisTask['synchronizers'].set(collectionName, synchronizer);
|
||||||
@@ -471,7 +449,7 @@ export class ToolHandlers {
|
|||||||
|
|
||||||
// Log embedding provider information before search
|
// Log embedding provider information before search
|
||||||
const embeddingProvider = this.context['embedding'];
|
const embeddingProvider = this.context['embedding'];
|
||||||
console.log(`[SEARCH] 🧠 Using embedding provider: ${embeddingProvider.getProvider()} for semantic search`);
|
console.log(`[SEARCH] 🧠 Using embedding provider: ${embeddingProvider.getProvider()} for search`);
|
||||||
console.log(`[SEARCH] 🔍 Generating embeddings for query using ${embeddingProvider.getProvider()}...`);
|
console.log(`[SEARCH] 🔍 Generating embeddings for query using ${embeddingProvider.getProvider()}...`);
|
||||||
|
|
||||||
// Search in the specified codebase
|
// Search in the specified codebase
|
||||||
@@ -505,7 +483,7 @@ export class ToolHandlers {
|
|||||||
|
|
||||||
return `${index + 1}. Code snippet (${result.language}) [${codebaseInfo}]\n` +
|
return `${index + 1}. Code snippet (${result.language}) [${codebaseInfo}]\n` +
|
||||||
` Location: ${location}\n` +
|
` Location: ${location}\n` +
|
||||||
` Score: ${result.score.toFixed(3)}\n` +
|
` Rank: ${index + 1}\n` +
|
||||||
` Context: \n\`\`\`${result.language}\n${context}\n\`\`\`\n`;
|
` Context: \n\`\`\`${result.language}\n${context}\n\`\`\`\n`;
|
||||||
}).join('\n');
|
}).join('\n');
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
import * as fs from "fs";
|
import * as fs from "fs";
|
||||||
import { Context } from "@zilliz/claude-context-core";
|
import { Context, FileSynchronizer } from "@zilliz/claude-context-core";
|
||||||
import { SnapshotManager } from "./snapshot.js";
|
import { SnapshotManager } from "./snapshot.js";
|
||||||
|
|
||||||
export class SyncManager {
|
export class SyncManager {
|
||||||
@@ -79,6 +79,11 @@ export class SyncManager {
|
|||||||
console.error(`[SYNC-DEBUG] Error syncing codebase '${codebasePath}' after ${codebaseElapsed}ms:`, error);
|
console.error(`[SYNC-DEBUG] Error syncing codebase '${codebasePath}' after ${codebaseElapsed}ms:`, error);
|
||||||
console.error(`[SYNC-DEBUG] Error stack:`, error.stack);
|
console.error(`[SYNC-DEBUG] Error stack:`, error.stack);
|
||||||
|
|
||||||
|
if (error.message.includes('Failed to query Milvus')) {
|
||||||
|
// Collection maybe deleted manually, delete the snapshot file
|
||||||
|
await FileSynchronizer.deleteSnapshot(codebasePath);
|
||||||
|
}
|
||||||
|
|
||||||
// Log additional error details
|
// Log additional error details
|
||||||
if (error.code) {
|
if (error.code) {
|
||||||
console.error(`[SYNC-DEBUG] Error code: ${error.code}`);
|
console.error(`[SYNC-DEBUG] Error code: ${error.code}`);
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
import * as vscode from 'vscode';
|
import * as vscode from 'vscode';
|
||||||
import { Context } from '@zilliz/claude-context-core';
|
import { Context } from '@zilliz/claude-context-core';
|
||||||
import * as path from 'path';
|
import * as path from 'path';
|
||||||
import * as crypto from 'crypto';
|
|
||||||
|
|
||||||
export class IndexCommand {
|
export class IndexCommand {
|
||||||
private context: Context;
|
private context: Context;
|
||||||
@@ -78,10 +77,9 @@ export class IndexCommand {
|
|||||||
const { FileSynchronizer } = await import("@zilliz/claude-context-core");
|
const { FileSynchronizer } = await import("@zilliz/claude-context-core");
|
||||||
const synchronizer = new FileSynchronizer(selectedFolder.uri.fsPath, this.context['ignorePatterns'] || []);
|
const synchronizer = new FileSynchronizer(selectedFolder.uri.fsPath, this.context['ignorePatterns'] || []);
|
||||||
await synchronizer.initialize();
|
await synchronizer.initialize();
|
||||||
// Store synchronizer in the context's internal map using the same collection name generation logic
|
// Store synchronizer in the context's internal map using the collection name from context
|
||||||
const normalizedPath = path.resolve(selectedFolder.uri.fsPath);
|
await this.context['prepareCollection'](selectedFolder.uri.fsPath);
|
||||||
const hash = crypto.createHash('md5').update(normalizedPath).digest('hex');
|
const collectionName = this.context['getCollectionName'](selectedFolder.uri.fsPath);
|
||||||
const collectionName = `code_chunks_${hash.substring(0, 8)}`;
|
|
||||||
this.context['synchronizers'].set(collectionName, synchronizer);
|
this.context['synchronizers'].set(collectionName, synchronizer);
|
||||||
|
|
||||||
// Start indexing with progress callback
|
// Start indexing with progress callback
|
||||||
|
|||||||
@@ -52,13 +52,25 @@ export class SearchCommand {
|
|||||||
}
|
}
|
||||||
const codebasePath = workspaceFolders[0].uri.fsPath;
|
const codebasePath = workspaceFolders[0].uri.fsPath;
|
||||||
|
|
||||||
// Use the new semantic search service
|
// Check if index exists
|
||||||
|
progress.report({ increment: 20, message: 'Checking index...' });
|
||||||
|
const hasIndex = await this.context.hasIndex(codebasePath);
|
||||||
|
|
||||||
|
if (!hasIndex) {
|
||||||
|
vscode.window.showErrorMessage('Index not found. Please index the codebase first.');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use semantic search
|
||||||
const query: SearchQuery = {
|
const query: SearchQuery = {
|
||||||
term: searchTerm,
|
term: searchTerm,
|
||||||
includeContent: true,
|
includeContent: true,
|
||||||
limit: 20
|
limit: 20
|
||||||
};
|
};
|
||||||
|
|
||||||
|
console.log('🔍 Using semantic search...');
|
||||||
|
progress.report({ increment: 50, message: 'Executing semantic search...' });
|
||||||
|
|
||||||
const results = await this.context.semanticSearch(
|
const results = await this.context.semanticSearch(
|
||||||
codebasePath,
|
codebasePath,
|
||||||
query.term,
|
query.term,
|
||||||
@@ -66,7 +78,7 @@ export class SearchCommand {
|
|||||||
0.3 // similarity threshold
|
0.3 // similarity threshold
|
||||||
);
|
);
|
||||||
|
|
||||||
progress.report({ increment: 100, message: 'Semantic search complete!' });
|
progress.report({ increment: 100, message: 'Search complete!' });
|
||||||
|
|
||||||
if (results.length === 0) {
|
if (results.length === 0) {
|
||||||
vscode.window.showInformationMessage(`No results found for "${searchTerm}"`);
|
vscode.window.showInformationMessage(`No results found for "${searchTerm}"`);
|
||||||
@@ -77,7 +89,7 @@ export class SearchCommand {
|
|||||||
const quickPickItems = this.generateQuickPickItems(results, searchTerm, codebasePath);
|
const quickPickItems = this.generateQuickPickItems(results, searchTerm, codebasePath);
|
||||||
|
|
||||||
const selected = await vscode.window.showQuickPick(quickPickItems, {
|
const selected = await vscode.window.showQuickPick(quickPickItems, {
|
||||||
placeHolder: `Found ${results.length} results for "${searchTerm}"`,
|
placeHolder: `Found ${results.length} results for "${searchTerm}" using semantic search`,
|
||||||
matchOnDescription: true,
|
matchOnDescription: true,
|
||||||
matchOnDetail: true
|
matchOnDetail: true
|
||||||
});
|
});
|
||||||
@@ -88,8 +100,8 @@ export class SearchCommand {
|
|||||||
});
|
});
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Semantic search failed:', error);
|
console.error('Search failed:', error);
|
||||||
vscode.window.showErrorMessage(`Semantic search failed: ${error}`);
|
vscode.window.showErrorMessage(`Search failed: ${error}. Please ensure the codebase is indexed.`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -135,7 +147,13 @@ export class SearchCommand {
|
|||||||
}
|
}
|
||||||
const codebasePath = workspaceFolders[0].uri.fsPath;
|
const codebasePath = workspaceFolders[0].uri.fsPath;
|
||||||
|
|
||||||
// Use the semantic search service
|
// Check if index exists
|
||||||
|
const hasIndex = await this.context.hasIndex(codebasePath);
|
||||||
|
if (!hasIndex) {
|
||||||
|
throw new Error('Index not found. Please index the codebase first.');
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('🔍 Using semantic search for webview...');
|
||||||
return await this.context.semanticSearch(
|
return await this.context.semanticSearch(
|
||||||
codebasePath,
|
codebasePath,
|
||||||
searchTerm,
|
searchTerm,
|
||||||
@@ -148,23 +166,31 @@ export class SearchCommand {
|
|||||||
* Check if index exists for the given codebase path
|
* Check if index exists for the given codebase path
|
||||||
*/
|
*/
|
||||||
async hasIndex(codebasePath: string): Promise<boolean> {
|
async hasIndex(codebasePath: string): Promise<boolean> {
|
||||||
return await this.context.hasIndex(codebasePath);
|
try {
|
||||||
|
return await this.context.hasIndex(codebasePath);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error checking index existence:', error);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generate quick pick items for VS Code
|
* Generate quick pick items for VS Code
|
||||||
*/
|
*/
|
||||||
private generateQuickPickItems(results: SemanticSearchResult[], searchTerm: string, workspaceRoot?: string) {
|
private generateQuickPickItems(results: SemanticSearchResult[], searchTerm: string, workspaceRoot?: string) {
|
||||||
return results.slice(0, 20).map(result => {
|
return results.slice(0, 20).map((result, index) => {
|
||||||
let displayPath = result.relativePath;
|
let displayPath = result.relativePath;
|
||||||
// Truncate content for display
|
// Truncate content for display
|
||||||
const truncatedContent = result.content.length <= 150
|
const truncatedContent = result.content.length <= 150
|
||||||
? result.content
|
? result.content
|
||||||
: result.content.substring(0, 150) + '...';
|
: result.content.substring(0, 150) + '...';
|
||||||
|
|
||||||
|
// Add rank info to description
|
||||||
|
const rankText = ` (rank: ${index + 1})`;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
label: `$(file-code) ${displayPath}`,
|
label: `$(file-code) ${displayPath}`,
|
||||||
description: `1 match in ${displayPath}`,
|
description: `$(search) semantic search${rankText}`,
|
||||||
detail: truncatedContent,
|
detail: truncatedContent,
|
||||||
result: result
|
result: result
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -197,7 +197,7 @@ class SemanticSearchController {
|
|||||||
this.resultsList.innerHTML = '<div class="no-results">No matches found</div>';
|
this.resultsList.innerHTML = '<div class="no-results">No matches found</div>';
|
||||||
} else {
|
} else {
|
||||||
this.resultsHeader.textContent = `${results.length} result${results.length === 1 ? '' : 's'} for "${query}"`;
|
this.resultsHeader.textContent = `${results.length} result${results.length === 1 ? '' : 's'} for "${query}"`;
|
||||||
this.resultsList.innerHTML = results.map(result => this.createResultHTML(result)).join('');
|
this.resultsList.innerHTML = results.map((result, index) => this.createResultHTML(result, index + 1)).join('');
|
||||||
}
|
}
|
||||||
this.resultsContainer.style.display = 'block';
|
this.resultsContainer.style.display = 'block';
|
||||||
}
|
}
|
||||||
@@ -205,9 +205,10 @@ class SemanticSearchController {
|
|||||||
/**
|
/**
|
||||||
* Create HTML for a single result item
|
* Create HTML for a single result item
|
||||||
* @param {Object} result - Result object
|
* @param {Object} result - Result object
|
||||||
|
* @param {number} rank - Result rank (1-indexed)
|
||||||
* @returns {string} HTML string
|
* @returns {string} HTML string
|
||||||
*/
|
*/
|
||||||
createResultHTML(result) {
|
createResultHTML(result, rank) {
|
||||||
return `
|
return `
|
||||||
<div class="result-item" onclick="searchController.openFile('${result.relativePath}', ${result.line}, ${result.startLine}, ${result.endLine})">
|
<div class="result-item" onclick="searchController.openFile('${result.relativePath}', ${result.line}, ${result.startLine}, ${result.endLine})">
|
||||||
<div class="result-file">
|
<div class="result-file">
|
||||||
@@ -216,7 +217,7 @@ class SemanticSearchController {
|
|||||||
</div>
|
</div>
|
||||||
<div class="result-preview">${result.preview}</div>
|
<div class="result-preview">${result.preview}</div>
|
||||||
<div class="result-context">${result.context}</div>
|
<div class="result-context">${result.context}</div>
|
||||||
${result.score ? `<div class="result-score" style="margin-top: 8px; text-align: right;">Similarity: ${(result.score * 100).toFixed(1)}%</div>` : ''}
|
<div class="result-rank" style="margin-top: 8px; text-align: right;">Rank: ${rank}</div>
|
||||||
</div>
|
</div>
|
||||||
`;
|
`;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -184,7 +184,7 @@ body {
|
|||||||
white-space: nowrap;
|
white-space: nowrap;
|
||||||
}
|
}
|
||||||
|
|
||||||
.result-score {
|
.result-rank {
|
||||||
font-size: 10px;
|
font-size: 10px;
|
||||||
color: var(--vscode-descriptionForeground);
|
color: var(--vscode-descriptionForeground);
|
||||||
background-color: var(--vscode-badge-background);
|
background-color: var(--vscode-badge-background);
|
||||||
|
|||||||
Reference in New Issue
Block a user