From caa1a5e8d82776a01a0c3fbde6dfa0cdf00a0038 Mon Sep 17 00:00:00 2001 From: Cheney Zhang Date: Wed, 6 Aug 2025 17:52:59 +0800 Subject: [PATCH] add tool get indexing status (#125) * add tool get indexing status Signed-off-by: ChengZi * remove redundant description Signed-off-by: ChengZi --------- Signed-off-by: ChengZi --- packages/mcp/src/config.ts | 2 +- packages/mcp/src/handlers.ts | 93 +++++++++++++++++++++++++- packages/mcp/src/index.ts | 32 +++++---- packages/mcp/src/snapshot.ts | 122 ++++++++++++++++++++++++++++++----- 4 files changed, 219 insertions(+), 30 deletions(-) diff --git a/packages/mcp/src/config.ts b/packages/mcp/src/config.ts index ee4fdfc..0740543 100644 --- a/packages/mcp/src/config.ts +++ b/packages/mcp/src/config.ts @@ -21,7 +21,7 @@ export interface ContextMcpConfig { export interface CodebaseSnapshot { indexedCodebases: string[]; - indexingCodebases: string[]; // List of codebases currently being indexed + indexingCodebases: string[] | Record; // Array (legacy) or Map of codebase path to progress percentage lastUpdated: string; } diff --git a/packages/mcp/src/handlers.ts b/packages/mcp/src/handlers.ts index 8581455..658064e 100644 --- a/packages/mcp/src/handlers.ts +++ b/packages/mcp/src/handlers.ts @@ -318,6 +318,7 @@ export class ToolHandlers { private async startBackgroundIndexing(codebasePath: string, forceReindex: boolean, splitterType: string) { const absolutePath = codebasePath; + let lastSaveTime = 0; // Track last save timestamp try { console.log(`[BACKGROUND-INDEX] Starting background indexing for: ${absolutePath}`); @@ -357,9 +358,22 @@ export class ToolHandlers { const embeddingProvider = this.context.getEmbedding(); console.log(`[BACKGROUND-INDEX] 🧠 Using embedding provider: ${embeddingProvider.getProvider()} with dimension: ${embeddingProvider.getDimension()}`); - // Start indexing with the appropriate context + // Start indexing with the appropriate context and progress tracking console.log(`[BACKGROUND-INDEX] 🚀 Beginning codebase indexing process...`); - const stats = await contextForThisTask.indexCodebase(absolutePath); + const stats = await contextForThisTask.indexCodebase(absolutePath, (progress) => { + // Update progress in snapshot manager + this.snapshotManager.updateIndexingProgress(absolutePath, progress.percentage); + + // Save snapshot periodically (every 2 seconds to avoid too frequent saves) + const currentTime = Date.now(); + if (currentTime - lastSaveTime >= 2000) { // 2 seconds = 2000ms + this.snapshotManager.saveCodebaseSnapshot(); + lastSaveTime = currentTime; + console.log(`[BACKGROUND-INDEX] 💾 Saved progress snapshot at ${progress.percentage.toFixed(1)}%`); + } + + console.log(`[BACKGROUND-INDEX] Progress: ${progress.phase} - ${progress.percentage}% (${progress.current}/${progress.total})`); + }); console.log(`[BACKGROUND-INDEX] ✅ Indexing completed successfully! Files: ${stats.indexedFiles}, Chunks: ${stats.totalChunks}`); // Move from indexing to indexed list @@ -645,4 +659,79 @@ export class ToolHandlers { }; } } + + public async handleGetIndexingStatus(args: any) { + const { path: codebasePath } = args; + + try { + // Force absolute path resolution + const absolutePath = ensureAbsolutePath(codebasePath); + + // Validate path exists + if (!fs.existsSync(absolutePath)) { + return { + content: [{ + type: "text", + text: `Error: Path '${absolutePath}' does not exist. Original input: '${codebasePath}'` + }], + isError: true + }; + } + + // Check if it's a directory + const stat = fs.statSync(absolutePath); + if (!stat.isDirectory()) { + return { + content: [{ + type: "text", + text: `Error: Path '${absolutePath}' is not a directory` + }], + isError: true + }; + } + + // Check indexing status + const isIndexed = this.snapshotManager.getIndexedCodebases().includes(absolutePath); + const isIndexing = this.snapshotManager.getIndexingCodebases().includes(absolutePath); + const indexingProgress = this.snapshotManager.getIndexingProgress(absolutePath); + + let statusMessage = ''; + + if (isIndexed) { + statusMessage = `✅ Codebase '${absolutePath}' is fully indexed and ready for search.`; + } else if (isIndexing) { + const progressPercentage = indexingProgress !== undefined ? indexingProgress : 0; + statusMessage = `🔄 Codebase '${absolutePath}' is currently being indexed. Progress: ${progressPercentage.toFixed(1)}%`; + + // Add more detailed status based on progress + if (progressPercentage < 10) { + statusMessage += ' (Preparing and scanning files...)'; + } else if (progressPercentage < 100) { + statusMessage += ' (Processing files and generating embeddings...)'; + } + } else { + statusMessage = `❌ Codebase '${absolutePath}' is not indexed. Please use the index_codebase tool to index it first.`; + } + + const pathInfo = codebasePath !== absolutePath + ? `\nNote: Input path '${codebasePath}' was resolved to absolute path '${absolutePath}'` + : ''; + + return { + content: [{ + type: "text", + text: statusMessage + pathInfo + }] + }; + + } catch (error: any) { + return { + content: [{ + type: "text", + text: `Error getting indexing status: ${error.message || error}` + }], + isError: true + }; + } + } } \ No newline at end of file diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts index 9f673b4..ac36c05 100644 --- a/packages/mcp/src/index.ts +++ b/packages/mcp/src/index.ts @@ -83,17 +83,11 @@ class ContextMcpServer { } private setupTools() { - // Get current working directory to provide to LLM - const currentWorkingDirectory = process.cwd(); - const index_description = ` Index a codebase directory to enable semantic search using a configurable code splitter. ⚠️ **IMPORTANT**: - You MUST provide an absolute path to the target codebase. -- Relative paths will be automatically resolved to absolute paths. -- Current working directory: ${currentWorkingDirectory}. - You MUST use this directly and DO NOT append any subfolder. ✨ **Usage Guidance**: - This tool is typically used when search fails due to an unindexed codebase. @@ -106,8 +100,6 @@ Search the indexed codebase using natural language queries within a specified ab ⚠️ **IMPORTANT**: - You MUST provide an absolute path. -- The current working directory is: ${currentWorkingDirectory}. -- You MUST use this as the default path and DO NOT append any subfolder. ✨ **Usage Guidance**: - If the codebase is not indexed, this tool will return a clear error message indicating that indexing is required first. @@ -126,7 +118,7 @@ Search the indexed codebase using natural language queries within a specified ab properties: { path: { type: "string", - description: `ABSOLUTE path to the codebase directory to index. Current working directory is: ${currentWorkingDirectory}. You can use this path directly or adjust as needed.` + description: `ABSOLUTE path to the codebase directory to index.` }, force: { type: "boolean", @@ -167,7 +159,7 @@ Search the indexed codebase using natural language queries within a specified ab properties: { path: { type: "string", - description: `ABSOLUTE path to the codebase directory to search in. Current working directory is: ${currentWorkingDirectory}. You can use this path directly or adjust as needed.` + description: `ABSOLUTE path to the codebase directory to search in.` }, query: { type: "string", @@ -185,13 +177,27 @@ Search the indexed codebase using natural language queries within a specified ab }, { name: "clear_index", - description: `Clear the search index. IMPORTANT: You MUST provide an absolute path. Current working directory is: ${currentWorkingDirectory}. You can use this as the default path or adjust as needed (e.g., ${currentWorkingDirectory}/subfolder).`, + description: `Clear the search index. IMPORTANT: You MUST provide an absolute path.`, inputSchema: { type: "object", properties: { path: { type: "string", - description: `ABSOLUTE path to the codebase directory to clear. Current working directory is: ${currentWorkingDirectory}. You can use this path directly or adjust as needed.` + description: `ABSOLUTE path to the codebase directory to clear.` + } + }, + required: ["path"] + } + }, + { + name: "get_indexing_status", + description: `Get the current indexing status of a codebase. Shows progress percentage for actively indexing codebases and completion status for indexed codebases.`, + inputSchema: { + type: "object", + properties: { + path: { + type: "string", + description: `ABSOLUTE path to the codebase directory to check status for.` } }, required: ["path"] @@ -212,6 +218,8 @@ Search the indexed codebase using natural language queries within a specified ab return await this.toolHandlers.handleSearchCode(args); case "clear_index": return await this.toolHandlers.handleClearIndex(args); + case "get_indexing_status": + return await this.toolHandlers.handleGetIndexingStatus(args); default: throw new Error(`Unknown tool: ${name}`); diff --git a/packages/mcp/src/snapshot.ts b/packages/mcp/src/snapshot.ts index 67b3da7..8ff7b38 100644 --- a/packages/mcp/src/snapshot.ts +++ b/packages/mcp/src/snapshot.ts @@ -6,7 +6,7 @@ import { CodebaseSnapshot } from "./config.js"; export class SnapshotManager { private snapshotFilePath: string; private indexedCodebases: string[] = []; - private indexingCodebases: string[] = []; + private indexingCodebases: Map = new Map(); // Map of codebase path to progress percentage constructor() { // Initialize snapshot file path @@ -14,21 +14,93 @@ export class SnapshotManager { } public getIndexedCodebases(): string[] { - return [...this.indexedCodebases]; + // Read from JSON file to ensure consistency and persistence + try { + if (!fs.existsSync(this.snapshotFilePath)) { + return []; + } + + const snapshotData = fs.readFileSync(this.snapshotFilePath, 'utf8'); + const snapshot: CodebaseSnapshot = JSON.parse(snapshotData); + + return snapshot.indexedCodebases || []; + } catch (error) { + console.warn(`[SNAPSHOT-DEBUG] Error reading indexed codebases from file:`, error); + // Fallback to memory if file reading fails + return [...this.indexedCodebases]; + } } public getIndexingCodebases(): string[] { - return [...this.indexingCodebases]; + // Read from JSON file to ensure consistency and persistence + try { + if (!fs.existsSync(this.snapshotFilePath)) { + return []; + } + + const snapshotData = fs.readFileSync(this.snapshotFilePath, 'utf8'); + const snapshot: CodebaseSnapshot = JSON.parse(snapshotData); + + // Handle both legacy array format and new object format + if (Array.isArray(snapshot.indexingCodebases)) { + // Legacy format: return the array directly + return snapshot.indexingCodebases; + } else if (snapshot.indexingCodebases && typeof snapshot.indexingCodebases === 'object') { + // New format: return the keys of the object + return Object.keys(snapshot.indexingCodebases); + } + + return []; + } catch (error) { + console.warn(`[SNAPSHOT-DEBUG] Error reading indexing codebases from file:`, error); + // Fallback to memory if file reading fails + return Array.from(this.indexingCodebases.keys()); + } } - public addIndexingCodebase(codebasePath: string): void { - if (!this.indexingCodebases.includes(codebasePath)) { - this.indexingCodebases.push(codebasePath); + public getIndexingCodebasesWithProgress(): Map { + return new Map(this.indexingCodebases); + } + + public getIndexingProgress(codebasePath: string): number | undefined { + // Read from JSON file to ensure consistency and persistence + try { + if (!fs.existsSync(this.snapshotFilePath)) { + return undefined; + } + + const snapshotData = fs.readFileSync(this.snapshotFilePath, 'utf8'); + const snapshot: CodebaseSnapshot = JSON.parse(snapshotData); + + // Handle both legacy array format and new object format + if (Array.isArray(snapshot.indexingCodebases)) { + // Legacy format: if path exists in array, assume 0% progress + return snapshot.indexingCodebases.includes(codebasePath) ? 0 : undefined; + } else if (snapshot.indexingCodebases && typeof snapshot.indexingCodebases === 'object') { + // New format: return the actual progress percentage + return snapshot.indexingCodebases[codebasePath]; + } + + return undefined; + } catch (error) { + console.warn(`[SNAPSHOT-DEBUG] Error reading progress from file for ${codebasePath}:`, error); + // Fallback to memory if file reading fails + return this.indexingCodebases.get(codebasePath); + } + } + + public addIndexingCodebase(codebasePath: string, progress: number = 0): void { + this.indexingCodebases.set(codebasePath, progress); + } + + public updateIndexingProgress(codebasePath: string, progress: number): void { + if (this.indexingCodebases.has(codebasePath)) { + this.indexingCodebases.set(codebasePath, progress); } } public removeIndexingCodebase(codebasePath: string): void { - this.indexingCodebases = this.indexingCodebases.filter(path => path !== codebasePath); + this.indexingCodebases.delete(codebasePath); } public addIndexedCodebase(codebasePath: string): void { @@ -72,8 +144,19 @@ export class SnapshotManager { } // Handle indexing codebases - treat them as not indexed since they were interrupted - const validIndexingCodebases: string[] = []; - for (const codebasePath of snapshot.indexingCodebases || []) { + // Support both legacy array format and new object format + let indexingCodebasesList: string[] = []; + if (Array.isArray(snapshot.indexingCodebases)) { + // Legacy format: string[] + indexingCodebasesList = snapshot.indexingCodebases; + console.log(`[SNAPSHOT-DEBUG] Found legacy indexingCodebases array format with ${indexingCodebasesList.length} entries`); + } else if (snapshot.indexingCodebases && typeof snapshot.indexingCodebases === 'object') { + // New format: Record + indexingCodebasesList = Object.keys(snapshot.indexingCodebases); + console.log(`[SNAPSHOT-DEBUG] Found new indexingCodebases object format with ${indexingCodebasesList.length} entries`); + } + + for (const codebasePath of indexingCodebasesList) { if (fs.existsSync(codebasePath)) { console.warn(`[SNAPSHOT-DEBUG] Found interrupted indexing codebase: ${codebasePath}. Treating as not indexed.`); // Don't add to validIndexingCodebases - treat as not indexed @@ -84,14 +167,17 @@ export class SnapshotManager { // Restore state - only fully indexed codebases this.indexedCodebases = validCodebases; - this.indexingCodebases = []; // Reset indexing codebases since they were interrupted + this.indexingCodebases = new Map(); // Reset indexing codebases since they were interrupted console.log(`[SNAPSHOT-DEBUG] Restored ${validCodebases.length} fully indexed codebases.`); - console.log(`[SNAPSHOT-DEBUG] Reset ${snapshot.indexingCodebases?.length || 0} interrupted indexing codebases.`); + console.log(`[SNAPSHOT-DEBUG] Reset ${indexingCodebasesList.length} interrupted indexing codebases.`); // Save updated snapshot if we removed any invalid paths or reset indexing codebases - if (validCodebases.length !== snapshot.indexedCodebases.length || - (snapshot.indexingCodebases && snapshot.indexingCodebases.length > 0)) { + const originalIndexingCount = Array.isArray(snapshot.indexingCodebases) + ? snapshot.indexingCodebases.length + : Object.keys(snapshot.indexingCodebases || {}).length; + + if (validCodebases.length !== snapshot.indexedCodebases.length || originalIndexingCount > 0) { this.saveCodebaseSnapshot(); } @@ -112,14 +198,20 @@ export class SnapshotManager { console.log('[SNAPSHOT-DEBUG] Created snapshot directory:', snapshotDir); } + // Convert Map to object for JSON serialization + const indexingCodebasesObject: Record = {}; + this.indexingCodebases.forEach((progress, path) => { + indexingCodebasesObject[path] = progress; + }); + const snapshot: CodebaseSnapshot = { indexedCodebases: this.indexedCodebases, - indexingCodebases: this.indexingCodebases, + indexingCodebases: indexingCodebasesObject, lastUpdated: new Date().toISOString() }; fs.writeFileSync(this.snapshotFilePath, JSON.stringify(snapshot, null, 2)); - console.log('[SNAPSHOT-DEBUG] Snapshot saved successfully. Indexed codebases:', this.indexedCodebases.length, 'Indexing codebases:', this.indexingCodebases.length); + console.log('[SNAPSHOT-DEBUG] Snapshot saved successfully. Indexed codebases:', this.indexedCodebases.length, 'Indexing codebases:', this.indexingCodebases.size); } catch (error: any) { console.error('[SNAPSHOT-DEBUG] Error saving snapshot:', error);