Merge pull request #645 from yamadashy/feat/optimize-token-count

perf: Optimize token counting for top files display
This commit is contained in:
Kazuki Yamada
2025-06-08 18:25:25 +09:00
committed by GitHub
10 changed files with 181 additions and 25 deletions

View File

@@ -119,8 +119,25 @@ npm run test
### Commit Message Format
- Follow [Conventional Commits](https://www.conventionalcommits.org/) with scope: `type(scope): Description`
- **Write all commit messages in English** - both title and body must be in English for consistency
- Write detailed commit messages focusing on the "why" rather than the "what"
- Examples: `feat(cli): Add new --no-progress flag`, `fix(security): Handle special characters in file paths`
- **Include user dialogue context**: Reference the specific conversation or request that led to the change in the commit body
- Format: Use title for technical change, body with clear dialogue section marker and summary
- Start with dialogue type summary, then provide bullet points of the conversation flow
- If user spoke in another language, translate their quotes to English in the commit message
- Examples:
```
feat(cli): Add new --no-progress flag
User requested a feature enhancement for CI automation:
- User asked: "Can we disable progress output for CI environments?"
- User explained: "Progress output creates noise in build logs"
- Assistant implemented --no-progress flag for automation compatibility
🤖 Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: Claude <noreply@anthropic.com>
```
## Pull Request Review Process

View File

@@ -64,7 +64,12 @@ export const runDefaultAction = async (
logger.log('');
if (config.output.topFilesLength > 0) {
printTopFiles(packResult.fileCharCounts, packResult.fileTokenCounts, config.output.topFilesLength);
printTopFiles(
packResult.fileCharCounts,
packResult.fileTokenCounts,
config.output.topFilesLength,
packResult.totalTokens,
);
logger.log('');
}

View File

@@ -22,8 +22,8 @@ export const printSummary = (packResult: PackResult, config: RepomixConfigMerged
logger.log(pc.white('📊 Pack Summary:'));
logger.log(pc.dim('────────────────'));
logger.log(`${pc.white(' Total Files:')} ${pc.white(packResult.totalFiles.toLocaleString())} files`);
logger.log(`${pc.white(' Total Chars:')} ${pc.white(packResult.totalCharacters.toLocaleString())} chars`);
logger.log(`${pc.white(' Total Tokens:')} ${pc.white(packResult.totalTokens.toLocaleString())} tokens`);
logger.log(`${pc.white(' Total Chars:')} ${pc.white(packResult.totalCharacters.toLocaleString())} chars`);
logger.log(`${pc.white(' Output:')} ${pc.white(config.output.filePath)}`);
logger.log(`${pc.white(' Security:')} ${pc.white(securityCheckMessage)}`);
@@ -84,24 +84,26 @@ export const printTopFiles = (
fileCharCounts: Record<string, number>,
fileTokenCounts: Record<string, number>,
topFilesLength: number,
totalTokens: number,
) => {
const topFilesLengthStrLen = topFilesLength.toString().length;
logger.log(pc.white(`📈 Top ${topFilesLength} Files by Character Count and Token Count:`));
logger.log(pc.white(`📈 Top ${topFilesLength} Files by Token Count:`));
logger.log(pc.dim(`─────────────────────────────────────────────────${'─'.repeat(topFilesLengthStrLen)}`));
const topFiles = Object.entries(fileCharCounts)
// Filter files that have token counts (top candidates by char count)
const filesWithTokenCounts = Object.entries(fileTokenCounts)
.filter(([, tokenCount]) => tokenCount > 0)
.sort((a, b) => b[1] - a[1])
.slice(0, topFilesLength);
// Calculate total token count
const totalTokens = Object.values(fileTokenCounts).reduce((sum, count) => sum + count, 0);
// Use the actual total tokens from the entire output
topFiles.forEach(([filePath, charCount], index) => {
const tokenCount = fileTokenCounts[filePath];
filesWithTokenCounts.forEach(([filePath, tokenCount], index) => {
const charCount = fileCharCounts[filePath];
const percentageOfTotal = totalTokens > 0 ? Number(((tokenCount / totalTokens) * 100).toFixed(1)) : 0;
const indexString = `${index + 1}.`.padEnd(3, ' ');
logger.log(
`${pc.white(`${indexString}`)} ${pc.white(filePath)} ${pc.dim(`(${charCount.toLocaleString()} chars, ${tokenCount.toLocaleString()} tokens, ${percentageOfTotal}%)`)}`,
`${pc.white(`${indexString}`)} ${pc.white(filePath)} ${pc.dim(`(${tokenCount.toLocaleString()} tokens, ${charCount.toLocaleString()} chars, ${percentageOfTotal}%)`)}`,
);
});
};

View File

@@ -57,3 +57,57 @@ export const calculateAllFileMetrics = async (
throw error;
}
};
export const calculateSelectiveFileMetrics = async (
processedFiles: ProcessedFile[],
targetFilePaths: string[],
tokenCounterEncoding: TiktokenEncoding,
progressCallback: RepomixProgressCallback,
deps = {
initTaskRunner,
},
): Promise<FileMetrics[]> => {
const targetFileSet = new Set(targetFilePaths);
const filesToProcess = processedFiles.filter((file) => targetFileSet.has(file.path));
if (filesToProcess.length === 0) {
return [];
}
const runTask = deps.initTaskRunner(filesToProcess.length);
const tasks = filesToProcess.map(
(file, index) =>
({
file,
index,
totalFiles: filesToProcess.length,
encoding: tokenCounterEncoding,
}) satisfies FileMetricsTask,
);
try {
const startTime = process.hrtime.bigint();
logger.trace(`Starting selective metrics calculation for ${filesToProcess.length} files using worker pool`);
let completedTasks = 0;
const results = await Promise.all(
tasks.map((task) =>
runTask(task).then((result) => {
completedTasks++;
progressCallback(`Calculating metrics... (${completedTasks}/${task.totalFiles}) ${pc.dim(task.file.path)}`);
logger.trace(`Calculating metrics... (${completedTasks}/${task.totalFiles}) ${task.file.path}`);
return result;
}),
),
);
const endTime = process.hrtime.bigint();
const duration = Number(endTime - startTime) / 1e6;
logger.trace(`Selective metrics calculation completed in ${duration.toFixed(2)}ms`);
return results;
} catch (error) {
logger.error('Error during selective metrics calculation:', error);
throw error;
}
};

View File

@@ -2,7 +2,7 @@ import type { RepomixConfigMerged } from '../../config/configSchema.js';
import type { RepomixProgressCallback } from '../../shared/types.js';
import type { ProcessedFile } from '../file/fileTypes.js';
import type { GitDiffResult } from '../git/gitDiffHandle.js';
import { calculateAllFileMetrics } from './calculateAllFileMetrics.js';
import { calculateAllFileMetrics, calculateSelectiveFileMetrics } from './calculateAllFileMetrics.js';
import { calculateOutputMetrics } from './calculateOutputMetrics.js';
export interface CalculateMetricsResult {
@@ -24,6 +24,7 @@ export const calculateMetrics = async (
gitDiffResult: GitDiffResult | undefined,
deps = {
calculateAllFileMetrics,
calculateSelectiveFileMetrics,
calculateOutputMetrics,
},
): Promise<CalculateMetricsResult> => {
@@ -46,18 +47,34 @@ export const calculateMetrics = async (
tokenCounter.free();
}
const [fileMetrics, totalTokens] = await Promise.all([
deps.calculateAllFileMetrics(processedFiles, config.tokenCount.encoding, progressCallback),
// For top files display optimization: calculate token counts only for top files by character count
const topFilesLength = config.output.topFilesLength;
const candidateFilesCount = Math.min(processedFiles.length, Math.max(topFilesLength * 10, topFilesLength));
// Get top files by character count first
const topFilesByChar = [...processedFiles]
.sort((a, b) => b.content.length - a.content.length)
.slice(0, candidateFilesCount);
const topFilePaths = topFilesByChar.map((file) => file.path);
const [selectiveFileMetrics, totalTokens] = await Promise.all([
deps.calculateSelectiveFileMetrics(processedFiles, topFilePaths, config.tokenCount.encoding, progressCallback),
deps.calculateOutputMetrics(output, config.tokenCount.encoding, config.output.filePath),
]);
const totalFiles = processedFiles.length;
const totalCharacters = output.length;
// Build character counts for all files
const fileCharCounts: Record<string, number> = {};
for (const file of processedFiles) {
fileCharCounts[file.path] = file.content.length;
}
// Build token counts only for top files
const fileTokenCounts: Record<string, number> = {};
for (const file of fileMetrics) {
fileCharCounts[file.path] = file.charCount;
for (const file of selectiveFileMetrics) {
fileTokenCounts[file.path] = file.tokenCount;
}

View File

@@ -135,7 +135,7 @@ describe('cliPrint', () => {
'README.md': 400,
};
printTopFiles(fileCharCounts, fileTokenCounts, 2);
printTopFiles(fileCharCounts, fileTokenCounts, 2, 60);
expect(logger.log).toHaveBeenCalledWith(expect.stringContaining('Top 2 Files'));
expect(logger.log).toHaveBeenCalledWith(expect.stringContaining('README.md'));
@@ -144,7 +144,7 @@ describe('cliPrint', () => {
});
test('should handle empty file list', () => {
printTopFiles({}, {}, 5);
printTopFiles({}, {}, 5, 0);
expect(logger.log).toHaveBeenCalledWith(expect.stringContaining('Top 5 Files'));
});

View File

@@ -1,6 +1,9 @@
import { describe, expect, it, vi } from 'vitest';
import type { ProcessedFile } from '../../../src/core/file/fileTypes.js';
import { calculateAllFileMetrics } from '../../../src/core/metrics/calculateAllFileMetrics.js';
import {
calculateAllFileMetrics,
calculateSelectiveFileMetrics,
} from '../../../src/core/metrics/calculateAllFileMetrics.js';
import type { FileMetricsTask } from '../../../src/core/metrics/workers/fileMetricsWorker.js';
import fileMetricsWorker from '../../../src/core/metrics/workers/fileMetricsWorker.js';
import type { RepomixProgressCallback } from '../../../src/shared/types.js';
@@ -32,4 +35,47 @@ describe('calculateAllFileMetrics', () => {
{ path: 'file2.txt', charCount: 200, tokenCount: 50 },
]);
});
it('should calculate metrics for selective files only', async () => {
const processedFiles: ProcessedFile[] = [
{ path: 'file1.txt', content: 'a'.repeat(100) },
{ path: 'file2.txt', content: 'b'.repeat(200) },
{ path: 'file3.txt', content: 'c'.repeat(300) },
];
const targetFilePaths = ['file1.txt', 'file3.txt'];
const progressCallback: RepomixProgressCallback = vi.fn();
const result = await calculateSelectiveFileMetrics(
processedFiles,
targetFilePaths,
'o200k_base',
progressCallback,
{
initTaskRunner: mockInitTaskRunner,
},
);
expect(result).toEqual([
{ path: 'file1.txt', charCount: 100, tokenCount: 13 },
{ path: 'file3.txt', charCount: 300, tokenCount: 75 },
]);
});
it('should return empty array when no target files match', async () => {
const processedFiles: ProcessedFile[] = [{ path: 'file1.txt', content: 'a'.repeat(100) }];
const targetFilePaths = ['nonexistent.txt'];
const progressCallback: RepomixProgressCallback = vi.fn();
const result = await calculateSelectiveFileMetrics(
processedFiles,
targetFilePaths,
'o200k_base',
progressCallback,
{
initTaskRunner: mockInitTaskRunner,
},
);
expect(result).toEqual([]);
});
});

View File

@@ -2,7 +2,10 @@ import { type Mock, describe, expect, it, vi } from 'vitest';
import type { ProcessedFile } from '../../../src/core/file/fileTypes.js';
import type { GitDiffResult } from '../../../src/core/git/gitDiffHandle.js';
import { TokenCounter } from '../../../src/core/metrics/TokenCounter.js';
import { calculateAllFileMetrics } from '../../../src/core/metrics/calculateAllFileMetrics.js';
import {
calculateAllFileMetrics,
calculateSelectiveFileMetrics,
} from '../../../src/core/metrics/calculateAllFileMetrics.js';
import { calculateMetrics } from '../../../src/core/metrics/calculateMetrics.js';
import type { RepomixProgressCallback } from '../../../src/shared/types.js';
import { createMockConfig } from '../../testing/testUtils.js';
@@ -16,7 +19,10 @@ vi.mock('../../../src/core/metrics/TokenCounter.js', () => {
};
});
vi.mock('../../../src/core/metrics/aggregateMetrics.js');
vi.mock('../../../src/core/metrics/calculateAllFileMetrics.js');
vi.mock('../../../src/core/metrics/calculateAllFileMetrics.js', () => ({
calculateAllFileMetrics: vi.fn(),
calculateSelectiveFileMetrics: vi.fn(),
}));
describe('calculateMetrics', () => {
it('should calculate metrics and return the result', async () => {
@@ -31,7 +37,7 @@ describe('calculateMetrics', () => {
{ path: 'file1.txt', charCount: 100, tokenCount: 10 },
{ path: 'file2.txt', charCount: 200, tokenCount: 20 },
];
(calculateAllFileMetrics as unknown as Mock).mockResolvedValue(fileMetrics);
(calculateSelectiveFileMetrics as unknown as Mock).mockResolvedValue(fileMetrics);
const aggregatedResult = {
totalFiles: 2,
@@ -54,11 +60,17 @@ describe('calculateMetrics', () => {
const result = await calculateMetrics(processedFiles, output, progressCallback, config, gitDiffResult, {
calculateAllFileMetrics,
calculateSelectiveFileMetrics,
calculateOutputMetrics: () => Promise.resolve(30),
});
expect(progressCallback).toHaveBeenCalledWith('Calculating metrics...');
expect(calculateAllFileMetrics).toHaveBeenCalledWith(processedFiles, 'o200k_base', progressCallback);
expect(calculateSelectiveFileMetrics).toHaveBeenCalledWith(
processedFiles,
['file2.txt', 'file1.txt'], // sorted by character count desc
'o200k_base',
progressCallback,
);
expect(result).toEqual(aggregatedResult);
});
});

View File

@@ -108,6 +108,7 @@ index 123..456 100644
},
{
calculateAllFileMetrics: mockCalculateAllFileMetrics,
calculateSelectiveFileMetrics: vi.fn().mockResolvedValue([]),
calculateOutputMetrics: mockCalculateOutputMetrics,
},
);
@@ -190,6 +191,7 @@ index 123..456 100644
undefined, // No diff content
{
calculateAllFileMetrics: mockCalculateAllFileMetrics,
calculateSelectiveFileMetrics: vi.fn().mockResolvedValue([]),
calculateOutputMetrics: mockCalculateOutputMetrics,
},
);
@@ -267,6 +269,7 @@ index 123..456 100644
undefined, // No diff content
{
calculateAllFileMetrics: mockCalculateAllFileMetrics,
calculateSelectiveFileMetrics: vi.fn().mockResolvedValue([]),
calculateOutputMetrics: mockCalculateOutputMetrics,
},
);

View File

@@ -186,10 +186,10 @@ onUnmounted(() => {
<dl v-if="result.metadata.summary">
<dt>Total Files</dt>
<dd>{{ result.metadata.summary.totalFiles.toLocaleString() }} <span class="unit">files</span></dd>
<dt>Total Size</dt>
<dd>{{ result.metadata.summary.totalCharacters.toLocaleString() }} <span class="unit">chars</span></dd>
<dt>Total Tokens</dt>
<dd>{{ result.metadata.summary.totalTokens.toLocaleString() }} <span class="unit">tokens</span></dd>
<dt>Total Size</dt>
<dd>{{ result.metadata.summary.totalCharacters.toLocaleString() }} <span class="unit">chars</span></dd>
</dl>
</div>
@@ -199,7 +199,7 @@ onUnmounted(() => {
<li v-for="file in result.metadata.topFiles" :key="file.path">
<div class="file-path">{{ file.path }}</div>
<div class="file-stats">
{{ file.charCount.toLocaleString() }} <span class="unit">chars</span> <span class="separator-unit">|</span> {{ file.tokenCount.toLocaleString() }} <span class="unit">tokens</span> <span class="separator-unit">|</span> {{ ((file.tokenCount / result.metadata.summary.totalTokens) * 100).toFixed(1) }}<span class="unit">%</span>
{{ file.tokenCount.toLocaleString() }} <span class="unit">tokens</span> <span class="separator-unit">|</span> {{ file.charCount.toLocaleString() }} <span class="unit">chars</span> <span class="separator-unit">|</span> {{ ((file.tokenCount / result.metadata.summary.totalTokens) * 100).toFixed(1) }}<span class="unit">%</span>
</div>
</li>
</ol>