mirror of
https://github.com/yamadashy/repomix.git
synced 2025-06-11 00:25:54 +03:00
Merge pull request #645 from yamadashy/feat/optimize-token-count
perf: Optimize token counting for top files display
This commit is contained in:
19
CLAUDE.md
19
CLAUDE.md
@@ -119,8 +119,25 @@ npm run test
|
||||
|
||||
### Commit Message Format
|
||||
- Follow [Conventional Commits](https://www.conventionalcommits.org/) with scope: `type(scope): Description`
|
||||
- **Write all commit messages in English** - both title and body must be in English for consistency
|
||||
- Write detailed commit messages focusing on the "why" rather than the "what"
|
||||
- Examples: `feat(cli): Add new --no-progress flag`, `fix(security): Handle special characters in file paths`
|
||||
- **Include user dialogue context**: Reference the specific conversation or request that led to the change in the commit body
|
||||
- Format: Use title for technical change, body with clear dialogue section marker and summary
|
||||
- Start with dialogue type summary, then provide bullet points of the conversation flow
|
||||
- If user spoke in another language, translate their quotes to English in the commit message
|
||||
- Examples:
|
||||
```
|
||||
feat(cli): Add new --no-progress flag
|
||||
|
||||
User requested a feature enhancement for CI automation:
|
||||
- User asked: "Can we disable progress output for CI environments?"
|
||||
- User explained: "Progress output creates noise in build logs"
|
||||
- Assistant implemented --no-progress flag for automation compatibility
|
||||
|
||||
🤖 Generated with [Claude Code](https://claude.ai/code)
|
||||
|
||||
Co-Authored-By: Claude <noreply@anthropic.com>
|
||||
```
|
||||
|
||||
## Pull Request Review Process
|
||||
|
||||
|
||||
@@ -64,7 +64,12 @@ export const runDefaultAction = async (
|
||||
logger.log('');
|
||||
|
||||
if (config.output.topFilesLength > 0) {
|
||||
printTopFiles(packResult.fileCharCounts, packResult.fileTokenCounts, config.output.topFilesLength);
|
||||
printTopFiles(
|
||||
packResult.fileCharCounts,
|
||||
packResult.fileTokenCounts,
|
||||
config.output.topFilesLength,
|
||||
packResult.totalTokens,
|
||||
);
|
||||
logger.log('');
|
||||
}
|
||||
|
||||
|
||||
@@ -22,8 +22,8 @@ export const printSummary = (packResult: PackResult, config: RepomixConfigMerged
|
||||
logger.log(pc.white('📊 Pack Summary:'));
|
||||
logger.log(pc.dim('────────────────'));
|
||||
logger.log(`${pc.white(' Total Files:')} ${pc.white(packResult.totalFiles.toLocaleString())} files`);
|
||||
logger.log(`${pc.white(' Total Chars:')} ${pc.white(packResult.totalCharacters.toLocaleString())} chars`);
|
||||
logger.log(`${pc.white(' Total Tokens:')} ${pc.white(packResult.totalTokens.toLocaleString())} tokens`);
|
||||
logger.log(`${pc.white(' Total Chars:')} ${pc.white(packResult.totalCharacters.toLocaleString())} chars`);
|
||||
logger.log(`${pc.white(' Output:')} ${pc.white(config.output.filePath)}`);
|
||||
logger.log(`${pc.white(' Security:')} ${pc.white(securityCheckMessage)}`);
|
||||
|
||||
@@ -84,24 +84,26 @@ export const printTopFiles = (
|
||||
fileCharCounts: Record<string, number>,
|
||||
fileTokenCounts: Record<string, number>,
|
||||
topFilesLength: number,
|
||||
totalTokens: number,
|
||||
) => {
|
||||
const topFilesLengthStrLen = topFilesLength.toString().length;
|
||||
logger.log(pc.white(`📈 Top ${topFilesLength} Files by Character Count and Token Count:`));
|
||||
logger.log(pc.white(`📈 Top ${topFilesLength} Files by Token Count:`));
|
||||
logger.log(pc.dim(`─────────────────────────────────────────────────${'─'.repeat(topFilesLengthStrLen)}`));
|
||||
|
||||
const topFiles = Object.entries(fileCharCounts)
|
||||
// Filter files that have token counts (top candidates by char count)
|
||||
const filesWithTokenCounts = Object.entries(fileTokenCounts)
|
||||
.filter(([, tokenCount]) => tokenCount > 0)
|
||||
.sort((a, b) => b[1] - a[1])
|
||||
.slice(0, topFilesLength);
|
||||
|
||||
// Calculate total token count
|
||||
const totalTokens = Object.values(fileTokenCounts).reduce((sum, count) => sum + count, 0);
|
||||
// Use the actual total tokens from the entire output
|
||||
|
||||
topFiles.forEach(([filePath, charCount], index) => {
|
||||
const tokenCount = fileTokenCounts[filePath];
|
||||
filesWithTokenCounts.forEach(([filePath, tokenCount], index) => {
|
||||
const charCount = fileCharCounts[filePath];
|
||||
const percentageOfTotal = totalTokens > 0 ? Number(((tokenCount / totalTokens) * 100).toFixed(1)) : 0;
|
||||
const indexString = `${index + 1}.`.padEnd(3, ' ');
|
||||
logger.log(
|
||||
`${pc.white(`${indexString}`)} ${pc.white(filePath)} ${pc.dim(`(${charCount.toLocaleString()} chars, ${tokenCount.toLocaleString()} tokens, ${percentageOfTotal}%)`)}`,
|
||||
`${pc.white(`${indexString}`)} ${pc.white(filePath)} ${pc.dim(`(${tokenCount.toLocaleString()} tokens, ${charCount.toLocaleString()} chars, ${percentageOfTotal}%)`)}`,
|
||||
);
|
||||
});
|
||||
};
|
||||
|
||||
@@ -57,3 +57,57 @@ export const calculateAllFileMetrics = async (
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
export const calculateSelectiveFileMetrics = async (
|
||||
processedFiles: ProcessedFile[],
|
||||
targetFilePaths: string[],
|
||||
tokenCounterEncoding: TiktokenEncoding,
|
||||
progressCallback: RepomixProgressCallback,
|
||||
deps = {
|
||||
initTaskRunner,
|
||||
},
|
||||
): Promise<FileMetrics[]> => {
|
||||
const targetFileSet = new Set(targetFilePaths);
|
||||
const filesToProcess = processedFiles.filter((file) => targetFileSet.has(file.path));
|
||||
|
||||
if (filesToProcess.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const runTask = deps.initTaskRunner(filesToProcess.length);
|
||||
const tasks = filesToProcess.map(
|
||||
(file, index) =>
|
||||
({
|
||||
file,
|
||||
index,
|
||||
totalFiles: filesToProcess.length,
|
||||
encoding: tokenCounterEncoding,
|
||||
}) satisfies FileMetricsTask,
|
||||
);
|
||||
|
||||
try {
|
||||
const startTime = process.hrtime.bigint();
|
||||
logger.trace(`Starting selective metrics calculation for ${filesToProcess.length} files using worker pool`);
|
||||
|
||||
let completedTasks = 0;
|
||||
const results = await Promise.all(
|
||||
tasks.map((task) =>
|
||||
runTask(task).then((result) => {
|
||||
completedTasks++;
|
||||
progressCallback(`Calculating metrics... (${completedTasks}/${task.totalFiles}) ${pc.dim(task.file.path)}`);
|
||||
logger.trace(`Calculating metrics... (${completedTasks}/${task.totalFiles}) ${task.file.path}`);
|
||||
return result;
|
||||
}),
|
||||
),
|
||||
);
|
||||
|
||||
const endTime = process.hrtime.bigint();
|
||||
const duration = Number(endTime - startTime) / 1e6;
|
||||
logger.trace(`Selective metrics calculation completed in ${duration.toFixed(2)}ms`);
|
||||
|
||||
return results;
|
||||
} catch (error) {
|
||||
logger.error('Error during selective metrics calculation:', error);
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -2,7 +2,7 @@ import type { RepomixConfigMerged } from '../../config/configSchema.js';
|
||||
import type { RepomixProgressCallback } from '../../shared/types.js';
|
||||
import type { ProcessedFile } from '../file/fileTypes.js';
|
||||
import type { GitDiffResult } from '../git/gitDiffHandle.js';
|
||||
import { calculateAllFileMetrics } from './calculateAllFileMetrics.js';
|
||||
import { calculateAllFileMetrics, calculateSelectiveFileMetrics } from './calculateAllFileMetrics.js';
|
||||
import { calculateOutputMetrics } from './calculateOutputMetrics.js';
|
||||
|
||||
export interface CalculateMetricsResult {
|
||||
@@ -24,6 +24,7 @@ export const calculateMetrics = async (
|
||||
gitDiffResult: GitDiffResult | undefined,
|
||||
deps = {
|
||||
calculateAllFileMetrics,
|
||||
calculateSelectiveFileMetrics,
|
||||
calculateOutputMetrics,
|
||||
},
|
||||
): Promise<CalculateMetricsResult> => {
|
||||
@@ -46,18 +47,34 @@ export const calculateMetrics = async (
|
||||
tokenCounter.free();
|
||||
}
|
||||
|
||||
const [fileMetrics, totalTokens] = await Promise.all([
|
||||
deps.calculateAllFileMetrics(processedFiles, config.tokenCount.encoding, progressCallback),
|
||||
// For top files display optimization: calculate token counts only for top files by character count
|
||||
const topFilesLength = config.output.topFilesLength;
|
||||
const candidateFilesCount = Math.min(processedFiles.length, Math.max(topFilesLength * 10, topFilesLength));
|
||||
|
||||
// Get top files by character count first
|
||||
const topFilesByChar = [...processedFiles]
|
||||
.sort((a, b) => b.content.length - a.content.length)
|
||||
.slice(0, candidateFilesCount);
|
||||
|
||||
const topFilePaths = topFilesByChar.map((file) => file.path);
|
||||
|
||||
const [selectiveFileMetrics, totalTokens] = await Promise.all([
|
||||
deps.calculateSelectiveFileMetrics(processedFiles, topFilePaths, config.tokenCount.encoding, progressCallback),
|
||||
deps.calculateOutputMetrics(output, config.tokenCount.encoding, config.output.filePath),
|
||||
]);
|
||||
|
||||
const totalFiles = processedFiles.length;
|
||||
const totalCharacters = output.length;
|
||||
|
||||
// Build character counts for all files
|
||||
const fileCharCounts: Record<string, number> = {};
|
||||
for (const file of processedFiles) {
|
||||
fileCharCounts[file.path] = file.content.length;
|
||||
}
|
||||
|
||||
// Build token counts only for top files
|
||||
const fileTokenCounts: Record<string, number> = {};
|
||||
for (const file of fileMetrics) {
|
||||
fileCharCounts[file.path] = file.charCount;
|
||||
for (const file of selectiveFileMetrics) {
|
||||
fileTokenCounts[file.path] = file.tokenCount;
|
||||
}
|
||||
|
||||
|
||||
@@ -135,7 +135,7 @@ describe('cliPrint', () => {
|
||||
'README.md': 400,
|
||||
};
|
||||
|
||||
printTopFiles(fileCharCounts, fileTokenCounts, 2);
|
||||
printTopFiles(fileCharCounts, fileTokenCounts, 2, 60);
|
||||
|
||||
expect(logger.log).toHaveBeenCalledWith(expect.stringContaining('Top 2 Files'));
|
||||
expect(logger.log).toHaveBeenCalledWith(expect.stringContaining('README.md'));
|
||||
@@ -144,7 +144,7 @@ describe('cliPrint', () => {
|
||||
});
|
||||
|
||||
test('should handle empty file list', () => {
|
||||
printTopFiles({}, {}, 5);
|
||||
printTopFiles({}, {}, 5, 0);
|
||||
|
||||
expect(logger.log).toHaveBeenCalledWith(expect.stringContaining('Top 5 Files'));
|
||||
});
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import type { ProcessedFile } from '../../../src/core/file/fileTypes.js';
|
||||
import { calculateAllFileMetrics } from '../../../src/core/metrics/calculateAllFileMetrics.js';
|
||||
import {
|
||||
calculateAllFileMetrics,
|
||||
calculateSelectiveFileMetrics,
|
||||
} from '../../../src/core/metrics/calculateAllFileMetrics.js';
|
||||
import type { FileMetricsTask } from '../../../src/core/metrics/workers/fileMetricsWorker.js';
|
||||
import fileMetricsWorker from '../../../src/core/metrics/workers/fileMetricsWorker.js';
|
||||
import type { RepomixProgressCallback } from '../../../src/shared/types.js';
|
||||
@@ -32,4 +35,47 @@ describe('calculateAllFileMetrics', () => {
|
||||
{ path: 'file2.txt', charCount: 200, tokenCount: 50 },
|
||||
]);
|
||||
});
|
||||
|
||||
it('should calculate metrics for selective files only', async () => {
|
||||
const processedFiles: ProcessedFile[] = [
|
||||
{ path: 'file1.txt', content: 'a'.repeat(100) },
|
||||
{ path: 'file2.txt', content: 'b'.repeat(200) },
|
||||
{ path: 'file3.txt', content: 'c'.repeat(300) },
|
||||
];
|
||||
const targetFilePaths = ['file1.txt', 'file3.txt'];
|
||||
const progressCallback: RepomixProgressCallback = vi.fn();
|
||||
|
||||
const result = await calculateSelectiveFileMetrics(
|
||||
processedFiles,
|
||||
targetFilePaths,
|
||||
'o200k_base',
|
||||
progressCallback,
|
||||
{
|
||||
initTaskRunner: mockInitTaskRunner,
|
||||
},
|
||||
);
|
||||
|
||||
expect(result).toEqual([
|
||||
{ path: 'file1.txt', charCount: 100, tokenCount: 13 },
|
||||
{ path: 'file3.txt', charCount: 300, tokenCount: 75 },
|
||||
]);
|
||||
});
|
||||
|
||||
it('should return empty array when no target files match', async () => {
|
||||
const processedFiles: ProcessedFile[] = [{ path: 'file1.txt', content: 'a'.repeat(100) }];
|
||||
const targetFilePaths = ['nonexistent.txt'];
|
||||
const progressCallback: RepomixProgressCallback = vi.fn();
|
||||
|
||||
const result = await calculateSelectiveFileMetrics(
|
||||
processedFiles,
|
||||
targetFilePaths,
|
||||
'o200k_base',
|
||||
progressCallback,
|
||||
{
|
||||
initTaskRunner: mockInitTaskRunner,
|
||||
},
|
||||
);
|
||||
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -2,7 +2,10 @@ import { type Mock, describe, expect, it, vi } from 'vitest';
|
||||
import type { ProcessedFile } from '../../../src/core/file/fileTypes.js';
|
||||
import type { GitDiffResult } from '../../../src/core/git/gitDiffHandle.js';
|
||||
import { TokenCounter } from '../../../src/core/metrics/TokenCounter.js';
|
||||
import { calculateAllFileMetrics } from '../../../src/core/metrics/calculateAllFileMetrics.js';
|
||||
import {
|
||||
calculateAllFileMetrics,
|
||||
calculateSelectiveFileMetrics,
|
||||
} from '../../../src/core/metrics/calculateAllFileMetrics.js';
|
||||
import { calculateMetrics } from '../../../src/core/metrics/calculateMetrics.js';
|
||||
import type { RepomixProgressCallback } from '../../../src/shared/types.js';
|
||||
import { createMockConfig } from '../../testing/testUtils.js';
|
||||
@@ -16,7 +19,10 @@ vi.mock('../../../src/core/metrics/TokenCounter.js', () => {
|
||||
};
|
||||
});
|
||||
vi.mock('../../../src/core/metrics/aggregateMetrics.js');
|
||||
vi.mock('../../../src/core/metrics/calculateAllFileMetrics.js');
|
||||
vi.mock('../../../src/core/metrics/calculateAllFileMetrics.js', () => ({
|
||||
calculateAllFileMetrics: vi.fn(),
|
||||
calculateSelectiveFileMetrics: vi.fn(),
|
||||
}));
|
||||
|
||||
describe('calculateMetrics', () => {
|
||||
it('should calculate metrics and return the result', async () => {
|
||||
@@ -31,7 +37,7 @@ describe('calculateMetrics', () => {
|
||||
{ path: 'file1.txt', charCount: 100, tokenCount: 10 },
|
||||
{ path: 'file2.txt', charCount: 200, tokenCount: 20 },
|
||||
];
|
||||
(calculateAllFileMetrics as unknown as Mock).mockResolvedValue(fileMetrics);
|
||||
(calculateSelectiveFileMetrics as unknown as Mock).mockResolvedValue(fileMetrics);
|
||||
|
||||
const aggregatedResult = {
|
||||
totalFiles: 2,
|
||||
@@ -54,11 +60,17 @@ describe('calculateMetrics', () => {
|
||||
|
||||
const result = await calculateMetrics(processedFiles, output, progressCallback, config, gitDiffResult, {
|
||||
calculateAllFileMetrics,
|
||||
calculateSelectiveFileMetrics,
|
||||
calculateOutputMetrics: () => Promise.resolve(30),
|
||||
});
|
||||
|
||||
expect(progressCallback).toHaveBeenCalledWith('Calculating metrics...');
|
||||
expect(calculateAllFileMetrics).toHaveBeenCalledWith(processedFiles, 'o200k_base', progressCallback);
|
||||
expect(calculateSelectiveFileMetrics).toHaveBeenCalledWith(
|
||||
processedFiles,
|
||||
['file2.txt', 'file1.txt'], // sorted by character count desc
|
||||
'o200k_base',
|
||||
progressCallback,
|
||||
);
|
||||
expect(result).toEqual(aggregatedResult);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -108,6 +108,7 @@ index 123..456 100644
|
||||
},
|
||||
{
|
||||
calculateAllFileMetrics: mockCalculateAllFileMetrics,
|
||||
calculateSelectiveFileMetrics: vi.fn().mockResolvedValue([]),
|
||||
calculateOutputMetrics: mockCalculateOutputMetrics,
|
||||
},
|
||||
);
|
||||
@@ -190,6 +191,7 @@ index 123..456 100644
|
||||
undefined, // No diff content
|
||||
{
|
||||
calculateAllFileMetrics: mockCalculateAllFileMetrics,
|
||||
calculateSelectiveFileMetrics: vi.fn().mockResolvedValue([]),
|
||||
calculateOutputMetrics: mockCalculateOutputMetrics,
|
||||
},
|
||||
);
|
||||
@@ -267,6 +269,7 @@ index 123..456 100644
|
||||
undefined, // No diff content
|
||||
{
|
||||
calculateAllFileMetrics: mockCalculateAllFileMetrics,
|
||||
calculateSelectiveFileMetrics: vi.fn().mockResolvedValue([]),
|
||||
calculateOutputMetrics: mockCalculateOutputMetrics,
|
||||
},
|
||||
);
|
||||
|
||||
@@ -186,10 +186,10 @@ onUnmounted(() => {
|
||||
<dl v-if="result.metadata.summary">
|
||||
<dt>Total Files</dt>
|
||||
<dd>{{ result.metadata.summary.totalFiles.toLocaleString() }} <span class="unit">files</span></dd>
|
||||
<dt>Total Size</dt>
|
||||
<dd>{{ result.metadata.summary.totalCharacters.toLocaleString() }} <span class="unit">chars</span></dd>
|
||||
<dt>Total Tokens</dt>
|
||||
<dd>{{ result.metadata.summary.totalTokens.toLocaleString() }} <span class="unit">tokens</span></dd>
|
||||
<dt>Total Size</dt>
|
||||
<dd>{{ result.metadata.summary.totalCharacters.toLocaleString() }} <span class="unit">chars</span></dd>
|
||||
</dl>
|
||||
</div>
|
||||
|
||||
@@ -199,7 +199,7 @@ onUnmounted(() => {
|
||||
<li v-for="file in result.metadata.topFiles" :key="file.path">
|
||||
<div class="file-path">{{ file.path }}</div>
|
||||
<div class="file-stats">
|
||||
{{ file.charCount.toLocaleString() }} <span class="unit">chars</span> <span class="separator-unit">|</span> {{ file.tokenCount.toLocaleString() }} <span class="unit">tokens</span> <span class="separator-unit">|</span> {{ ((file.tokenCount / result.metadata.summary.totalTokens) * 100).toFixed(1) }}<span class="unit">%</span>
|
||||
{{ file.tokenCount.toLocaleString() }} <span class="unit">tokens</span> <span class="separator-unit">|</span> {{ file.charCount.toLocaleString() }} <span class="unit">chars</span> <span class="separator-unit">|</span> {{ ((file.tokenCount / result.metadata.summary.totalTokens) * 100).toFixed(1) }}<span class="unit">%</span>
|
||||
</div>
|
||||
</li>
|
||||
</ol>
|
||||
|
||||
Reference in New Issue
Block a user