mirror of
https://github.com/zilliztech/claude-context.git
synced 2025-10-06 01:10:02 +03:00
[Refactor]: Refactor the file synchronizer to use Merkle DAG
Signed-off-by: ShawnZheng <shawn.zheng@zilliz.com>
This commit is contained in:
@@ -351,7 +351,10 @@ export class CodeContext {
|
||||
updateIgnorePatterns(ignorePatterns: string[]): void {
|
||||
// Merge with default patterns, avoiding duplicates
|
||||
const mergedPatterns = [...DEFAULT_IGNORE_PATTERNS, ...ignorePatterns];
|
||||
this.ignorePatterns = [...new Set(mergedPatterns)]; // Remove duplicates
|
||||
const uniquePatterns: string[] = [];
|
||||
const patternSet = new Set(mergedPatterns);
|
||||
patternSet.forEach(pattern => uniquePatterns.push(pattern));
|
||||
this.ignorePatterns = uniquePatterns;
|
||||
console.log(`🚫 Updated ignore patterns: ${ignorePatterns.length} from .gitignore + ${DEFAULT_IGNORE_PATTERNS.length} default = ${this.ignorePatterns.length} total patterns`);
|
||||
}
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ export class AstCodeSplitter implements Splitter {
|
||||
if (chunkSize) this.chunkSize = chunkSize;
|
||||
if (chunkOverlap) this.chunkOverlap = chunkOverlap;
|
||||
this.parser = new Parser();
|
||||
|
||||
|
||||
// Initialize fallback splitter
|
||||
const { LangChainCodeSplitter } = require('./langchain-splitter');
|
||||
this.langchainFallback = new LangChainCodeSplitter(chunkSize, chunkOverlap);
|
||||
@@ -47,7 +47,7 @@ export class AstCodeSplitter implements Splitter {
|
||||
|
||||
try {
|
||||
console.log(`🌳 Using AST splitter for ${language} file: ${filePath || 'unknown'}`);
|
||||
|
||||
|
||||
this.parser.setLanguage(langConfig.parser);
|
||||
const tree = this.parser.parse(code);
|
||||
|
||||
@@ -58,7 +58,7 @@ export class AstCodeSplitter implements Splitter {
|
||||
|
||||
// Extract chunks based on AST nodes
|
||||
const chunks = this.extractChunks(tree.rootNode, code, langConfig.nodeTypes, language, filePath);
|
||||
|
||||
|
||||
// If chunks are too large, split them further
|
||||
const refinedChunks = await this.refineChunks(chunks, code);
|
||||
|
||||
@@ -100,8 +100,8 @@ export class AstCodeSplitter implements Splitter {
|
||||
}
|
||||
|
||||
private extractChunks(
|
||||
node: Parser.SyntaxNode,
|
||||
code: string,
|
||||
node: Parser.SyntaxNode,
|
||||
code: string,
|
||||
splittableTypes: string[],
|
||||
language: string,
|
||||
filePath?: string
|
||||
@@ -255,7 +255,7 @@ export class AstCodeSplitter implements Splitter {
|
||||
*/
|
||||
static isLanguageSupported(language: string): boolean {
|
||||
const supportedLanguages = [
|
||||
'javascript', 'js', 'typescript', 'ts', 'python', 'py',
|
||||
'javascript', 'js', 'typescript', 'ts', 'python', 'py',
|
||||
'java', 'cpp', 'c++', 'c', 'go', 'rust', 'rs'
|
||||
];
|
||||
return supportedLanguages.includes(language.toLowerCase());
|
||||
|
||||
@@ -1,93 +1,99 @@
|
||||
import * as crypto from 'crypto';
|
||||
|
||||
export class MerkleNode {
|
||||
export interface MerkleDAGNode {
|
||||
id: string;
|
||||
hash: string;
|
||||
left: MerkleNode | null;
|
||||
right: MerkleNode | null;
|
||||
|
||||
constructor(hash: string, left: MerkleNode | null = null, right: MerkleNode | null = null) {
|
||||
this.hash = hash;
|
||||
this.left = left;
|
||||
this.right = right;
|
||||
}
|
||||
|
||||
static serializeNode(node: MerkleNode | null): any {
|
||||
if (!node) return null;
|
||||
return {
|
||||
hash: node.hash,
|
||||
left: MerkleNode.serializeNode(node.left),
|
||||
right: MerkleNode.serializeNode(node.right)
|
||||
};
|
||||
}
|
||||
|
||||
static deserializeNode(data: any): MerkleNode | null {
|
||||
if (!data) return null;
|
||||
return new MerkleNode(
|
||||
data.hash,
|
||||
MerkleNode.deserializeNode(data.left),
|
||||
MerkleNode.deserializeNode(data.right)
|
||||
);
|
||||
}
|
||||
data: string;
|
||||
parents: string[];
|
||||
children: string[];
|
||||
}
|
||||
|
||||
export class MerkleTree {
|
||||
root: MerkleNode;
|
||||
leaves: MerkleNode[];
|
||||
export class MerkleDAG {
|
||||
nodes: Map<string, MerkleDAGNode>;
|
||||
rootIds: string[];
|
||||
|
||||
constructor(data: string[]) {
|
||||
const leaves = data.map(d => new MerkleNode(this.hash(d)));
|
||||
this.leaves = leaves;
|
||||
this.root = this.buildTree(leaves);
|
||||
constructor() {
|
||||
this.nodes = new Map();
|
||||
this.rootIds = [];
|
||||
}
|
||||
|
||||
private hash(data: string): string {
|
||||
return crypto.createHash('sha256').update(data).digest('hex');
|
||||
}
|
||||
|
||||
private buildTree(nodes: MerkleNode[]): MerkleNode {
|
||||
if (nodes.length === 0) {
|
||||
return new MerkleNode(this.hash(''));
|
||||
}
|
||||
if (nodes.length === 1) {
|
||||
return nodes[0];
|
||||
public addNode(data: string, parentId?: string): string {
|
||||
const nodeId = this.hash(data);
|
||||
const node: MerkleDAGNode = {
|
||||
id: nodeId,
|
||||
hash: nodeId,
|
||||
data,
|
||||
parents: [],
|
||||
children: []
|
||||
};
|
||||
|
||||
// If there's a parent, create the relationship
|
||||
if (parentId) {
|
||||
const parentNode = this.nodes.get(parentId);
|
||||
if (parentNode) {
|
||||
node.parents.push(parentId);
|
||||
parentNode.children.push(nodeId);
|
||||
this.nodes.set(parentId, parentNode);
|
||||
}
|
||||
} else {
|
||||
// If no parent, it's a root node
|
||||
this.rootIds.push(nodeId);
|
||||
}
|
||||
|
||||
const parents: MerkleNode[] = [];
|
||||
for (let i = 0; i < nodes.length; i += 2) {
|
||||
const left = nodes[i];
|
||||
const right = (i + 1 < nodes.length) ? nodes[i + 1] : left;
|
||||
const parentHash = this.hash(left.hash + right.hash);
|
||||
parents.push(new MerkleNode(parentHash, left, right));
|
||||
}
|
||||
|
||||
return this.buildTree(parents);
|
||||
this.nodes.set(nodeId, node);
|
||||
return nodeId;
|
||||
}
|
||||
|
||||
public getRootHash(): string {
|
||||
return this.root.hash;
|
||||
public getNode(nodeId: string): MerkleDAGNode | undefined {
|
||||
return this.nodes.get(nodeId);
|
||||
}
|
||||
|
||||
public static compare(tree1: MerkleTree, tree2: MerkleTree): { added: string[], removed: string[], modified: string[] } {
|
||||
const C1 = new Map(tree1.leaves.map(l => [l.hash, l]));
|
||||
const C2 = new Map(tree2.leaves.map(l => [l.hash, l]));
|
||||
public getAllNodes(): MerkleDAGNode[] {
|
||||
return Array.from(this.nodes.values());
|
||||
}
|
||||
|
||||
const added = Array.from(C2.keys()).filter(k => !C1.has(k));
|
||||
const removed = Array.from(C1.keys()).filter(k => !C2.has(k));
|
||||
|
||||
return { added, removed, modified: [] };
|
||||
public getRootNodes(): MerkleDAGNode[] {
|
||||
return this.rootIds.map(id => this.nodes.get(id)!).filter(Boolean);
|
||||
}
|
||||
|
||||
public getLeafNodes(): MerkleDAGNode[] {
|
||||
return Array.from(this.nodes.values()).filter(node => node.children.length === 0);
|
||||
}
|
||||
|
||||
public serialize(): any {
|
||||
return {
|
||||
root: MerkleNode.serializeNode(this.root),
|
||||
leaves: this.leaves.map(l => MerkleNode.serializeNode(l))
|
||||
nodes: Array.from(this.nodes.entries()),
|
||||
rootIds: this.rootIds
|
||||
};
|
||||
}
|
||||
|
||||
static deserialize(data: any): MerkleTree {
|
||||
const tree = Object.create(MerkleTree.prototype);
|
||||
tree.root = MerkleNode.deserializeNode(data.root);
|
||||
tree.leaves = (data.leaves || []).map((l: any) => MerkleNode.deserializeNode(l));
|
||||
return tree;
|
||||
public static deserialize(data: any): MerkleDAG {
|
||||
const dag = new MerkleDAG();
|
||||
dag.nodes = new Map(data.nodes);
|
||||
dag.rootIds = data.rootIds;
|
||||
return dag;
|
||||
}
|
||||
|
||||
public static compare(dag1: MerkleDAG, dag2: MerkleDAG): { added: string[], removed: string[], modified: string[] } {
|
||||
const nodes1 = new Map(Array.from(dag1.getAllNodes()).map(n => [n.id, n]));
|
||||
const nodes2 = new Map(Array.from(dag2.getAllNodes()).map(n => [n.id, n]));
|
||||
|
||||
const added = Array.from(nodes2.keys()).filter(k => !nodes1.has(k));
|
||||
const removed = Array.from(nodes1.keys()).filter(k => !nodes2.has(k));
|
||||
|
||||
// For modified, we'll check if the data has changed for nodes that exist in both
|
||||
const modified: string[] = [];
|
||||
for (const [id, node1] of Array.from(nodes1.entries())) {
|
||||
const node2 = nodes2.get(id);
|
||||
if (node2 && node1.data !== node2.data) {
|
||||
modified.push(id);
|
||||
}
|
||||
}
|
||||
|
||||
return { added, removed, modified };
|
||||
}
|
||||
}
|
||||
@@ -1,12 +1,12 @@
|
||||
import * as fs from 'fs/promises';
|
||||
import * as path from 'path';
|
||||
import * as crypto from 'crypto';
|
||||
import { MerkleTree } from './merkle';
|
||||
import { MerkleDAG } from './merkle';
|
||||
import * as os from 'os';
|
||||
|
||||
export class FileSynchronizer {
|
||||
private fileHashes: Map<string, string>;
|
||||
private merkleTree: MerkleTree;
|
||||
private merkleDAG: MerkleDAG;
|
||||
private rootDir: string;
|
||||
private snapshotPath: string;
|
||||
private ignorePatterns: string[];
|
||||
@@ -15,7 +15,7 @@ export class FileSynchronizer {
|
||||
this.rootDir = rootDir;
|
||||
this.snapshotPath = this.getSnapshotPath(rootDir);
|
||||
this.fileHashes = new Map();
|
||||
this.merkleTree = new MerkleTree([]);
|
||||
this.merkleDAG = new MerkleDAG();
|
||||
this.ignorePatterns = ignorePatterns;
|
||||
}
|
||||
|
||||
@@ -72,7 +72,9 @@ export class FileSynchronizer {
|
||||
// Verify it's really a directory and not ignored
|
||||
if (!this.shouldIgnore(relativePath, true)) {
|
||||
const subHashes = await this.generateFileHashes(fullPath);
|
||||
for (const [p, h] of subHashes) {
|
||||
const entries = Array.from(subHashes.entries());
|
||||
for (let i = 0; i < entries.length; i++) {
|
||||
const [p, h] = entries[i];
|
||||
fileHashes.set(p, h);
|
||||
}
|
||||
}
|
||||
@@ -184,16 +186,32 @@ export class FileSynchronizer {
|
||||
return regex.test(text);
|
||||
}
|
||||
|
||||
private buildMerkleTree(fileHashes: Map<string, string>): MerkleTree {
|
||||
const sortedPaths = Array.from(fileHashes.keys()).sort();
|
||||
const data = sortedPaths.map(p => p + fileHashes.get(p));
|
||||
return new MerkleTree(data);
|
||||
private buildMerkleDAG(fileHashes: Map<string, string>): MerkleDAG {
|
||||
const dag = new MerkleDAG();
|
||||
const keys = Array.from(fileHashes.keys());
|
||||
const sortedPaths = keys.slice().sort(); // Create a sorted copy
|
||||
|
||||
// Create a root node for the entire directory
|
||||
let valuesString = "";
|
||||
keys.forEach(key => {
|
||||
valuesString += fileHashes.get(key);
|
||||
});
|
||||
const rootNodeData = "root:" + valuesString;
|
||||
const rootNodeId = dag.addNode(rootNodeData);
|
||||
|
||||
// Add each file as a child of the root
|
||||
for (const path of sortedPaths) {
|
||||
const fileData = path + ":" + fileHashes.get(path);
|
||||
dag.addNode(fileData, rootNodeId);
|
||||
}
|
||||
|
||||
return dag;
|
||||
}
|
||||
|
||||
public async initialize() {
|
||||
console.log(`Initializing file synchronizer for ${this.rootDir}`);
|
||||
await this.loadSnapshot();
|
||||
this.merkleTree = this.buildMerkleTree(this.fileHashes);
|
||||
this.merkleDAG = this.buildMerkleDAG(this.fileHashes);
|
||||
console.log(`File synchronizer initialized. Loaded ${this.fileHashes.size} file hashes.`);
|
||||
}
|
||||
|
||||
@@ -201,27 +219,26 @@ export class FileSynchronizer {
|
||||
console.log('Checking for file changes...');
|
||||
|
||||
const newFileHashes = await this.generateFileHashes(this.rootDir);
|
||||
const newMerkleTree = this.buildMerkleTree(newFileHashes);
|
||||
const newMerkleDAG = this.buildMerkleDAG(newFileHashes);
|
||||
|
||||
if (this.merkleTree.getRootHash() === newMerkleTree.getRootHash()) {
|
||||
console.log('No changes detected based on Merkle root hash.');
|
||||
return { added: [], removed: [], modified: [] };
|
||||
}
|
||||
|
||||
console.log('Merkle root hash has changed. Comparing file states...');
|
||||
const changes = this.compareStates(this.fileHashes, newFileHashes);
|
||||
|
||||
this.fileHashes = newFileHashes;
|
||||
this.merkleTree = newMerkleTree;
|
||||
await this.saveSnapshot();
|
||||
// Compare the DAGs
|
||||
const changes = MerkleDAG.compare(this.merkleDAG, newMerkleDAG);
|
||||
|
||||
// If there are any changes in the DAG, we should also do a file-level comparison
|
||||
if (changes.added.length > 0 || changes.removed.length > 0 || changes.modified.length > 0) {
|
||||
console.log(`Found changes: ${changes.added.length} added, ${changes.removed.length} removed, ${changes.modified.length} modified.`);
|
||||
} else {
|
||||
console.log('No file-level changes detected after detailed comparison.');
|
||||
console.log('Merkle DAG has changed. Comparing file states...');
|
||||
const fileChanges = this.compareStates(this.fileHashes, newFileHashes);
|
||||
|
||||
this.fileHashes = newFileHashes;
|
||||
this.merkleDAG = newMerkleDAG;
|
||||
await this.saveSnapshot();
|
||||
|
||||
console.log(`Found changes: ${fileChanges.added.length} added, ${fileChanges.removed.length} removed, ${fileChanges.modified.length} modified.`);
|
||||
return fileChanges;
|
||||
}
|
||||
|
||||
return changes;
|
||||
console.log('No changes detected based on Merkle DAG comparison.');
|
||||
return { added: [], removed: [], modified: [] };
|
||||
}
|
||||
|
||||
private compareStates(oldHashes: Map<string, string>, newHashes: Map<string, string>): { added: string[], removed: string[], modified: string[] } {
|
||||
@@ -229,7 +246,9 @@ export class FileSynchronizer {
|
||||
const removed: string[] = [];
|
||||
const modified: string[] = [];
|
||||
|
||||
for (const [file, hash] of newHashes.entries()) {
|
||||
const newEntries = Array.from(newHashes.entries());
|
||||
for (let i = 0; i < newEntries.length; i++) {
|
||||
const [file, hash] = newEntries[i];
|
||||
if (!oldHashes.has(file)) {
|
||||
added.push(file);
|
||||
} else if (oldHashes.get(file) !== hash) {
|
||||
@@ -237,7 +256,9 @@ export class FileSynchronizer {
|
||||
}
|
||||
}
|
||||
|
||||
for (const file of oldHashes.keys()) {
|
||||
const oldKeys = Array.from(oldHashes.keys());
|
||||
for (let i = 0; i < oldKeys.length; i++) {
|
||||
const file = oldKeys[i];
|
||||
if (!newHashes.has(file)) {
|
||||
removed.push(file);
|
||||
}
|
||||
@@ -253,9 +274,17 @@ export class FileSynchronizer {
|
||||
private async saveSnapshot(): Promise<void> {
|
||||
const merkleDir = path.dirname(this.snapshotPath);
|
||||
await fs.mkdir(merkleDir, { recursive: true });
|
||||
|
||||
// Convert Map to array without using iterator
|
||||
const fileHashesArray: [string, string][] = [];
|
||||
const keys = Array.from(this.fileHashes.keys());
|
||||
keys.forEach(key => {
|
||||
fileHashesArray.push([key, this.fileHashes.get(key)!]);
|
||||
});
|
||||
|
||||
const data = JSON.stringify({
|
||||
fileHashes: Array.from(this.fileHashes.entries()),
|
||||
merkleTree: this.merkleTree.serialize()
|
||||
fileHashes: fileHashesArray,
|
||||
merkleDAG: this.merkleDAG.serialize()
|
||||
});
|
||||
await fs.writeFile(this.snapshotPath, data, 'utf-8');
|
||||
console.log(`Saved snapshot to ${this.snapshotPath}`);
|
||||
@@ -265,16 +294,22 @@ export class FileSynchronizer {
|
||||
try {
|
||||
const data = await fs.readFile(this.snapshotPath, 'utf-8');
|
||||
const obj = JSON.parse(data);
|
||||
this.fileHashes = new Map(obj.fileHashes);
|
||||
if (obj.merkleTree) {
|
||||
this.merkleTree = MerkleTree.deserialize(obj.merkleTree);
|
||||
|
||||
// Reconstruct Map without using constructor with iterator
|
||||
this.fileHashes = new Map();
|
||||
for (const [key, value] of obj.fileHashes) {
|
||||
this.fileHashes.set(key, value);
|
||||
}
|
||||
|
||||
if (obj.merkleDAG) {
|
||||
this.merkleDAG = MerkleDAG.deserialize(obj.merkleDAG);
|
||||
}
|
||||
console.log(`Loaded snapshot from ${this.snapshotPath}`);
|
||||
} catch (error: any) {
|
||||
if (error.code === 'ENOENT') {
|
||||
console.log(`Snapshot file not found at ${this.snapshotPath}. Generating new one.`);
|
||||
this.fileHashes = await this.generateFileHashes(this.rootDir);
|
||||
this.merkleTree = this.buildMerkleTree(this.fileHashes);
|
||||
this.merkleDAG = this.buildMerkleDAG(this.fileHashes);
|
||||
await this.saveSnapshot();
|
||||
} else {
|
||||
throw error;
|
||||
|
||||
@@ -16,7 +16,8 @@
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"moduleResolution": "node",
|
||||
"composite": true,
|
||||
"allowSyntheticDefaultImports": true
|
||||
"allowSyntheticDefaultImports": true,
|
||||
"downlevelIteration": true
|
||||
},
|
||||
"include": [
|
||||
"src/**/*"
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
"resolveJsonModule": true,
|
||||
"allowSyntheticDefaultImports": true,
|
||||
"baseUrl": ".",
|
||||
"downlevelIteration": true,
|
||||
"paths": {
|
||||
"@zilliz/code-context-core": [
|
||||
"./packages/core/src"
|
||||
|
||||
Reference in New Issue
Block a user