Merge pull request #645 from yamadashy/feat/optimize-token-count

perf: Optimize token counting for top files display
2025-06-11 00:25:54 +03:00 · 2025-06-08 18:25:25 +09:00
parent 4684c6d712 a8f4301587
commit 0d0f20ec55
10 changed files with 181 additions and 25 deletions
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -119,8 +119,25 @@ npm run test

 ### Commit Message Format
 - Follow [Conventional Commits](https://www.conventionalcommits.org/) with scope: `type(scope): Description`
+- **Write all commit messages in English** - both title and body must be in English for consistency
 - Write detailed commit messages focusing on the "why" rather than the "what"
- Examples: `feat(cli): Add new --no-progress flag`, `fix(security): Handle special characters in file paths`
+- **Include user dialogue context**: Reference the specific conversation or request that led to the change in the commit body
+- Format: Use title for technical change, body with clear dialogue section marker and summary
+- Start with dialogue type summary, then provide bullet points of the conversation flow
+- If user spoke in another language, translate their quotes to English in the commit message
+- Examples: 
+  ```
+  feat(cli): Add new --no-progress flag
+  
+  User requested a feature enhancement for CI automation:
+  - User asked: "Can we disable progress output for CI environments?"
+  - User explained: "Progress output creates noise in build logs"
+  - Assistant implemented --no-progress flag for automation compatibility
+  
+  🤖 Generated with [Claude Code](https://claude.ai/code)
+  
+  Co-Authored-By: Claude <noreply@anthropic.com>
+  ```

 ## Pull Request Review Process

--- a/src/cli/actions/defaultAction.ts
+++ b/src/cli/actions/defaultAction.ts
@@ -64,7 +64,12 @@ export const runDefaultAction = async (
  logger.log('');

  if (config.output.topFilesLength > 0) {
-    printTopFiles(packResult.fileCharCounts, packResult.fileTokenCounts, config.output.topFilesLength);
+    printTopFiles(
+      packResult.fileCharCounts,
+      packResult.fileTokenCounts,
+      config.output.topFilesLength,
+      packResult.totalTokens,
+    );
    logger.log('');
  }

--- a/src/cli/cliPrint.ts
+++ b/src/cli/cliPrint.ts
@@ -22,8 +22,8 @@ export const printSummary = (packResult: PackResult, config: RepomixConfigMerged
  logger.log(pc.white('📊 Pack Summary:'));
  logger.log(pc.dim('────────────────'));
  logger.log(`${pc.white('  Total Files:')} ${pc.white(packResult.totalFiles.toLocaleString())} files`);
-  logger.log(`${pc.white('  Total Chars:')} ${pc.white(packResult.totalCharacters.toLocaleString())} chars`);
  logger.log(`${pc.white(' Total Tokens:')} ${pc.white(packResult.totalTokens.toLocaleString())} tokens`);
+  logger.log(`${pc.white('  Total Chars:')} ${pc.white(packResult.totalCharacters.toLocaleString())} chars`);
  logger.log(`${pc.white('       Output:')} ${pc.white(config.output.filePath)}`);
  logger.log(`${pc.white('     Security:')} ${pc.white(securityCheckMessage)}`);

@@ -84,24 +84,26 @@ export const printTopFiles = (
  fileCharCounts: Record<string, number>,
  fileTokenCounts: Record<string, number>,
  topFilesLength: number,
+  totalTokens: number,
 ) => {
  const topFilesLengthStrLen = topFilesLength.toString().length;
-  logger.log(pc.white(`📈 Top ${topFilesLength} Files by Character Count and Token Count:`));
+  logger.log(pc.white(`📈 Top ${topFilesLength} Files by Token Count:`));
  logger.log(pc.dim(`─────────────────────────────────────────────────${'─'.repeat(topFilesLengthStrLen)}`));

-  const topFiles = Object.entries(fileCharCounts)
+  // Filter files that have token counts (top candidates by char count)
+  const filesWithTokenCounts = Object.entries(fileTokenCounts)
+    .filter(([, tokenCount]) => tokenCount > 0)
    .sort((a, b) => b[1] - a[1])
    .slice(0, topFilesLength);

-  // Calculate total token count
-  const totalTokens = Object.values(fileTokenCounts).reduce((sum, count) => sum + count, 0);
+  // Use the actual total tokens from the entire output

-  topFiles.forEach(([filePath, charCount], index) => {
-    const tokenCount = fileTokenCounts[filePath];
+  filesWithTokenCounts.forEach(([filePath, tokenCount], index) => {
+    const charCount = fileCharCounts[filePath];
    const percentageOfTotal = totalTokens > 0 ? Number(((tokenCount / totalTokens) * 100).toFixed(1)) : 0;
    const indexString = `${index + 1}.`.padEnd(3, ' ');
    logger.log(
-      `${pc.white(`${indexString}`)} ${pc.white(filePath)} ${pc.dim(`(${charCount.toLocaleString()} chars, ${tokenCount.toLocaleString()} tokens, ${percentageOfTotal}%)`)}`,
+      `${pc.white(`${indexString}`)} ${pc.white(filePath)} ${pc.dim(`(${tokenCount.toLocaleString()} tokens, ${charCount.toLocaleString()} chars, ${percentageOfTotal}%)`)}`,
    );
  });
 };
--- a/src/core/metrics/calculateAllFileMetrics.ts
+++ b/src/core/metrics/calculateAllFileMetrics.ts
@@ -57,3 +57,57 @@ export const calculateAllFileMetrics = async (
    throw error;
  }
 };
+
+export const calculateSelectiveFileMetrics = async (
+  processedFiles: ProcessedFile[],
+  targetFilePaths: string[],
+  tokenCounterEncoding: TiktokenEncoding,
+  progressCallback: RepomixProgressCallback,
+  deps = {
+    initTaskRunner,
+  },
+): Promise<FileMetrics[]> => {
+  const targetFileSet = new Set(targetFilePaths);
+  const filesToProcess = processedFiles.filter((file) => targetFileSet.has(file.path));
+
+  if (filesToProcess.length === 0) {
+    return [];
+  }
+
+  const runTask = deps.initTaskRunner(filesToProcess.length);
+  const tasks = filesToProcess.map(
+    (file, index) =>
+      ({
+        file,
+        index,
+        totalFiles: filesToProcess.length,
+        encoding: tokenCounterEncoding,
+      }) satisfies FileMetricsTask,
+  );
+
+  try {
+    const startTime = process.hrtime.bigint();
+    logger.trace(`Starting selective metrics calculation for ${filesToProcess.length} files using worker pool`);
+
+    let completedTasks = 0;
+    const results = await Promise.all(
+      tasks.map((task) =>
+        runTask(task).then((result) => {
+          completedTasks++;
+          progressCallback(`Calculating metrics... (${completedTasks}/${task.totalFiles}) ${pc.dim(task.file.path)}`);
+          logger.trace(`Calculating metrics... (${completedTasks}/${task.totalFiles}) ${task.file.path}`);
+          return result;
+        }),
+      ),
+    );
+
+    const endTime = process.hrtime.bigint();
+    const duration = Number(endTime - startTime) / 1e6;
+    logger.trace(`Selective metrics calculation completed in ${duration.toFixed(2)}ms`);
+
+    return results;
+  } catch (error) {
+    logger.error('Error during selective metrics calculation:', error);
+    throw error;
+  }
+};
--- a/src/core/metrics/calculateMetrics.ts
+++ b/src/core/metrics/calculateMetrics.ts
@@ -2,7 +2,7 @@ import type { RepomixConfigMerged } from '../../config/configSchema.js';
 import type { RepomixProgressCallback } from '../../shared/types.js';
 import type { ProcessedFile } from '../file/fileTypes.js';
 import type { GitDiffResult } from '../git/gitDiffHandle.js';
-import { calculateAllFileMetrics } from './calculateAllFileMetrics.js';
+import { calculateAllFileMetrics, calculateSelectiveFileMetrics } from './calculateAllFileMetrics.js';
 import { calculateOutputMetrics } from './calculateOutputMetrics.js';

 export interface CalculateMetricsResult {
@@ -24,6 +24,7 @@ export const calculateMetrics = async (
  gitDiffResult: GitDiffResult | undefined,
  deps = {
    calculateAllFileMetrics,
+    calculateSelectiveFileMetrics,
    calculateOutputMetrics,
  },
 ): Promise<CalculateMetricsResult> => {
@@ -46,18 +47,34 @@ export const calculateMetrics = async (
    tokenCounter.free();
  }

-  const [fileMetrics, totalTokens] = await Promise.all([
-    deps.calculateAllFileMetrics(processedFiles, config.tokenCount.encoding, progressCallback),
+  // For top files display optimization: calculate token counts only for top files by character count
+  const topFilesLength = config.output.topFilesLength;
+  const candidateFilesCount = Math.min(processedFiles.length, Math.max(topFilesLength * 10, topFilesLength));
+
+  // Get top files by character count first
+  const topFilesByChar = [...processedFiles]
+    .sort((a, b) => b.content.length - a.content.length)
+    .slice(0, candidateFilesCount);
+
+  const topFilePaths = topFilesByChar.map((file) => file.path);
+
+  const [selectiveFileMetrics, totalTokens] = await Promise.all([
+    deps.calculateSelectiveFileMetrics(processedFiles, topFilePaths, config.tokenCount.encoding, progressCallback),
    deps.calculateOutputMetrics(output, config.tokenCount.encoding, config.output.filePath),
  ]);

  const totalFiles = processedFiles.length;
  const totalCharacters = output.length;

+  // Build character counts for all files
  const fileCharCounts: Record<string, number> = {};
+  for (const file of processedFiles) {
+    fileCharCounts[file.path] = file.content.length;
+  }
+
+  // Build token counts only for top files
  const fileTokenCounts: Record<string, number> = {};
-  for (const file of fileMetrics) {
-    fileCharCounts[file.path] = file.charCount;
+  for (const file of selectiveFileMetrics) {
    fileTokenCounts[file.path] = file.tokenCount;
  }

--- a/tests/cli/cliPrint.test.ts
+++ b/tests/cli/cliPrint.test.ts
@@ -135,7 +135,7 @@ describe('cliPrint', () => {
        'README.md': 400,
      };

-      printTopFiles(fileCharCounts, fileTokenCounts, 2);
+      printTopFiles(fileCharCounts, fileTokenCounts, 2, 60);

      expect(logger.log).toHaveBeenCalledWith(expect.stringContaining('Top 2 Files'));
      expect(logger.log).toHaveBeenCalledWith(expect.stringContaining('README.md'));
@@ -144,7 +144,7 @@ describe('cliPrint', () => {
    });

    test('should handle empty file list', () => {
-      printTopFiles({}, {}, 5);
+      printTopFiles({}, {}, 5, 0);

      expect(logger.log).toHaveBeenCalledWith(expect.stringContaining('Top 5 Files'));
    });
--- a/tests/core/metrics/calculateAllFileMetrics.test.ts
+++ b/tests/core/metrics/calculateAllFileMetrics.test.ts
@@ -1,6 +1,9 @@
 import { describe, expect, it, vi } from 'vitest';
 import type { ProcessedFile } from '../../../src/core/file/fileTypes.js';
-import { calculateAllFileMetrics } from '../../../src/core/metrics/calculateAllFileMetrics.js';
+import {
+  calculateAllFileMetrics,
+  calculateSelectiveFileMetrics,
+} from '../../../src/core/metrics/calculateAllFileMetrics.js';
 import type { FileMetricsTask } from '../../../src/core/metrics/workers/fileMetricsWorker.js';
 import fileMetricsWorker from '../../../src/core/metrics/workers/fileMetricsWorker.js';
 import type { RepomixProgressCallback } from '../../../src/shared/types.js';
@@ -32,4 +35,47 @@ describe('calculateAllFileMetrics', () => {
      { path: 'file2.txt', charCount: 200, tokenCount: 50 },
    ]);
  });
+
+  it('should calculate metrics for selective files only', async () => {
+    const processedFiles: ProcessedFile[] = [
+      { path: 'file1.txt', content: 'a'.repeat(100) },
+      { path: 'file2.txt', content: 'b'.repeat(200) },
+      { path: 'file3.txt', content: 'c'.repeat(300) },
+    ];
+    const targetFilePaths = ['file1.txt', 'file3.txt'];
+    const progressCallback: RepomixProgressCallback = vi.fn();
+
+    const result = await calculateSelectiveFileMetrics(
+      processedFiles,
+      targetFilePaths,
+      'o200k_base',
+      progressCallback,
+      {
+        initTaskRunner: mockInitTaskRunner,
+      },
+    );
+
+    expect(result).toEqual([
+      { path: 'file1.txt', charCount: 100, tokenCount: 13 },
+      { path: 'file3.txt', charCount: 300, tokenCount: 75 },
+    ]);
+  });
+
+  it('should return empty array when no target files match', async () => {
+    const processedFiles: ProcessedFile[] = [{ path: 'file1.txt', content: 'a'.repeat(100) }];
+    const targetFilePaths = ['nonexistent.txt'];
+    const progressCallback: RepomixProgressCallback = vi.fn();
+
+    const result = await calculateSelectiveFileMetrics(
+      processedFiles,
+      targetFilePaths,
+      'o200k_base',
+      progressCallback,
+      {
+        initTaskRunner: mockInitTaskRunner,
+      },
+    );
+
+    expect(result).toEqual([]);
+  });
 });
--- a/tests/core/metrics/calculateMetrics.test.ts
+++ b/tests/core/metrics/calculateMetrics.test.ts
@@ -2,7 +2,10 @@ import { type Mock, describe, expect, it, vi } from 'vitest';
 import type { ProcessedFile } from '../../../src/core/file/fileTypes.js';
 import type { GitDiffResult } from '../../../src/core/git/gitDiffHandle.js';
 import { TokenCounter } from '../../../src/core/metrics/TokenCounter.js';
-import { calculateAllFileMetrics } from '../../../src/core/metrics/calculateAllFileMetrics.js';
+import {
+  calculateAllFileMetrics,
+  calculateSelectiveFileMetrics,
+} from '../../../src/core/metrics/calculateAllFileMetrics.js';
 import { calculateMetrics } from '../../../src/core/metrics/calculateMetrics.js';
 import type { RepomixProgressCallback } from '../../../src/shared/types.js';
 import { createMockConfig } from '../../testing/testUtils.js';
@@ -16,7 +19,10 @@ vi.mock('../../../src/core/metrics/TokenCounter.js', () => {
  };
 });
 vi.mock('../../../src/core/metrics/aggregateMetrics.js');
-vi.mock('../../../src/core/metrics/calculateAllFileMetrics.js');
+vi.mock('../../../src/core/metrics/calculateAllFileMetrics.js', () => ({
+  calculateAllFileMetrics: vi.fn(),
+  calculateSelectiveFileMetrics: vi.fn(),
+}));

 describe('calculateMetrics', () => {
  it('should calculate metrics and return the result', async () => {
@@ -31,7 +37,7 @@ describe('calculateMetrics', () => {
      { path: 'file1.txt', charCount: 100, tokenCount: 10 },
      { path: 'file2.txt', charCount: 200, tokenCount: 20 },
    ];
-    (calculateAllFileMetrics as unknown as Mock).mockResolvedValue(fileMetrics);
+    (calculateSelectiveFileMetrics as unknown as Mock).mockResolvedValue(fileMetrics);

    const aggregatedResult = {
      totalFiles: 2,
@@ -54,11 +60,17 @@ describe('calculateMetrics', () => {

    const result = await calculateMetrics(processedFiles, output, progressCallback, config, gitDiffResult, {
      calculateAllFileMetrics,
+      calculateSelectiveFileMetrics,
      calculateOutputMetrics: () => Promise.resolve(30),
    });

    expect(progressCallback).toHaveBeenCalledWith('Calculating metrics...');
-    expect(calculateAllFileMetrics).toHaveBeenCalledWith(processedFiles, 'o200k_base', progressCallback);
+    expect(calculateSelectiveFileMetrics).toHaveBeenCalledWith(
+      processedFiles,
+      ['file2.txt', 'file1.txt'], // sorted by character count desc
+      'o200k_base',
+      progressCallback,
+    );
    expect(result).toEqual(aggregatedResult);
  });
 });
--- a/tests/core/metrics/diffTokenCount.test.ts
+++ b/tests/core/metrics/diffTokenCount.test.ts
@@ -108,6 +108,7 @@ index 123..456 100644
      },
      {
        calculateAllFileMetrics: mockCalculateAllFileMetrics,
+        calculateSelectiveFileMetrics: vi.fn().mockResolvedValue([]),
        calculateOutputMetrics: mockCalculateOutputMetrics,
      },
    );
@@ -190,6 +191,7 @@ index 123..456 100644
      undefined, // No diff content
      {
        calculateAllFileMetrics: mockCalculateAllFileMetrics,
+        calculateSelectiveFileMetrics: vi.fn().mockResolvedValue([]),
        calculateOutputMetrics: mockCalculateOutputMetrics,
      },
    );
@@ -267,6 +269,7 @@ index 123..456 100644
      undefined, // No diff content
      {
        calculateAllFileMetrics: mockCalculateAllFileMetrics,
+        calculateSelectiveFileMetrics: vi.fn().mockResolvedValue([]),
        calculateOutputMetrics: mockCalculateOutputMetrics,
      },
    );
--- a/website/client/components/Home/TryItResultContent.vue
+++ b/website/client/components/Home/TryItResultContent.vue
@@ -186,10 +186,10 @@ onUnmounted(() => {
        <dl v-if="result.metadata.summary">
          <dt>Total Files</dt>
          <dd>{{ result.metadata.summary.totalFiles.toLocaleString() }} <span class="unit">files</span></dd>
-          <dt>Total Size</dt>
-          <dd>{{ result.metadata.summary.totalCharacters.toLocaleString() }} <span class="unit">chars</span></dd>
          <dt>Total Tokens</dt>
          <dd>{{ result.metadata.summary.totalTokens.toLocaleString() }} <span class="unit">tokens</span></dd>
+          <dt>Total Size</dt>
+          <dd>{{ result.metadata.summary.totalCharacters.toLocaleString() }} <span class="unit">chars</span></dd>
        </dl>
      </div>

@@ -199,7 +199,7 @@ onUnmounted(() => {
          <li v-for="file in result.metadata.topFiles" :key="file.path">
            <div class="file-path">{{ file.path }}</div>
            <div class="file-stats">
-              {{ file.charCount.toLocaleString() }} <span class="unit">chars</span> <span class="separator-unit">|</span> {{ file.tokenCount.toLocaleString() }} <span class="unit">tokens</span> <span class="separator-unit">|</span> {{ ((file.tokenCount / result.metadata.summary.totalTokens) * 100).toFixed(1) }}<span class="unit">%</span>
+              {{ file.tokenCount.toLocaleString() }} <span class="unit">tokens</span> <span class="separator-unit">|</span> {{ file.charCount.toLocaleString() }} <span class="unit">chars</span> <span class="separator-unit">|</span> {{ ((file.tokenCount / result.metadata.summary.totalTokens) * 100).toFixed(1) }}<span class="unit">%</span>
            </div>
          </li>
        </ol>