count lines of code

2025-10-23 02:57:14 +03:00 · 2024-11-25 13:24:28 +04:00
parent 2c5d05538c
commit c0646be2a1
2 changed files with 267 additions and 0 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -269,11 +269,68 @@ jobs:
          path: ./pipsize.json
          destination: pip-sizes.json

+  check_line_count:
+    docker:
+      - image: cimg/python:3.10
+    steps:
+      - checkout
+      
+      - run:
+          name: Setup git for PR comparison
+          command: |
+            if [[ -n "$CIRCLE_PULL_REQUEST" ]]; then
+              PR_NUMBER=$(echo $CIRCLE_PULL_REQUEST | rev | cut -d'/' -f1 | rev)
+              BASE_BRANCH=$(curl -s -H "Circle-Token: $CIRCLE_TOKEN" \
+                "https://circleci.com/api/v2/project/github/$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME/pipeline/$CIRCLE_WORKFLOW_ID" \
+                | jq -r '.target_branch')
+              
+              git clone -b $BASE_BRANCH --single-branch \
+                https://github.com/$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME.git \
+                base_branch
+            fi
+      
+      - run:
+          name: Install dependencies
+          command: |
+            python -m pip install --upgrade pip
+            pip install tabulate
+      
+      - run:
+          name: Run line count check
+          command: |
+            if [[ -n "$CIRCLE_PULL_REQUEST" ]]; then
+              python extra/line_counter.py base_branch .
+            else
+              python extra/line_counter.py .
+            fi
+      
+      - store_artifacts:
+          path: line-count-snapshot.json
+          destination: line-count-snapshot.json
+      
+      - store_artifacts:
+          path: line-count-diff.json
+          destination: line-count-diff.json
+      
+      - run:
+          name: Create test results directory
+          command: |
+            mkdir -p test-results/line-count
+            cp line-count-*.json test-results/line-count/
+      
+      - store_test_results:
+          path: test-results

 workflows:
  version: 2
  build_and_test:
    jobs:
+      - check_line_count:
+          filters:
+            branches:
+              only: /.*/
+            tags:
+              only: /.*/
      - unit_test
      - discovery_integration_test
      - chatgpt_api_integration_test_mlx
--- a/extra/line_counter.py
+++ b/extra/line_counter.py
@@ -0,0 +1,210 @@
+#!/usr/bin/env python3
+import os
+import sys
+import json
+import token
+import tokenize
+from datetime import datetime, timezone
+
+TOKEN_WHITELIST = [token.OP, token.NAME, token.NUMBER, token.STRING]
+
+def is_docstring(t):
+    return t.type == token.STRING and t.string.startswith('"""') and t.line.strip().startswith('"""')
+
+def gen_stats(base_path="."):
+    table = []
+    exo_path = os.path.join(base_path, "exo")
+    if not os.path.exists(exo_path):
+        print(f"Warning: {exo_path} directory not found")
+        return table
+
+    for path, _, files in os.walk(exo_path):
+        for name in files:
+            if not name.endswith(".py"):
+                continue
+            
+            filepath = os.path.join(path, name)
+            relfilepath = os.path.relpath(filepath, base_path).replace('\\', '/')
+            
+            try:
+                with tokenize.open(filepath) as file_:
+                    tokens = [t for t in tokenize.generate_tokens(file_.readline) 
+                            if t.type in TOKEN_WHITELIST and not is_docstring(t)]
+                    token_count = len(tokens)
+                    line_count = len(set([x for t in tokens 
+                                        for x in range(t.start[0], t.end[0]+1)]))
+                    if line_count > 0:
+                        table.append([relfilepath, line_count, token_count/line_count])
+            except Exception as e:
+                print(f"Error processing {filepath}: {e}")
+                continue
+                
+    return table
+
+def gen_diff(table_old, table_new):
+    table = []
+    files_new = set([x[0] for x in table_new])
+    files_old = set([x[0] for x in table_old])
+    
+    added = files_new - files_old
+    deleted = files_old - files_new
+    unchanged = files_new & files_old
+    
+    for file in added:
+        file_stat = [stats for stats in table_new if file in stats][0]
+        table.append([file_stat[0], file_stat[1], file_stat[1], file_stat[2], file_stat[2]])
+    
+    for file in deleted:
+        file_stat = [stats for stats in table_old if file in stats][0]
+        table.append([file_stat[0], 0, -file_stat[1], 0, -file_stat[2]])
+    
+    for file in unchanged:
+        file_stat_old = [stats for stats in table_old if file in stats][0]
+        file_stat_new = [stats for stats in table_new if file in stats][0]
+        if file_stat_new[1] != file_stat_old[1] or file_stat_new[2] != file_stat_old[2]:
+            table.append([
+                file_stat_new[0],
+                file_stat_new[1],
+                file_stat_new[1] - file_stat_old[1],
+                file_stat_new[2],
+                file_stat_new[2] - file_stat_old[2]
+            ])
+    
+    return table
+
+def create_json_report(table, is_diff=False):
+    timestamp = datetime.now(timezone.utc).isoformat()
+    commit_sha = os.environ.get('CIRCLE_SHA1', 'unknown')
+    branch = os.environ.get('CIRCLE_BRANCH', 'unknown')
+    pr_number = os.environ.get('CIRCLE_PR_NUMBER', '')
+    
+    if is_diff:
+        files = [{
+            'name': row[0],
+            'current_lines': row[1],
+            'line_diff': row[2],
+            'current_tokens_per_line': row[3],
+            'tokens_per_line_diff': row[4]
+        } for row in table]
+        
+        report = {
+            'type': 'diff',
+            'timestamp': timestamp,
+            'commit_sha': commit_sha,
+            'branch': branch,
+            'pr_number': pr_number,
+            'files': files,
+            'total_line_changes': sum(row[2] for row in table),
+            'total_files_changed': len(files)
+        }
+    else:
+        files = [{
+            'name': row[0],
+            'lines': row[1],
+            'tokens_per_line': row[2]
+        } for row in table]
+        
+        report = {
+            'type': 'snapshot',
+            'timestamp': timestamp,
+            'commit_sha': commit_sha,
+            'branch': branch,
+            'files': files,
+            'total_lines': sum(row[1] for row in table),
+            'total_files': len(files)
+        }
+    
+    return report
+
+def display_diff(diff):
+    return "+" + str(diff) if diff > 0 else str(diff)
+
+def format_table(rows, headers, floatfmt):
+    if not rows:
+        return ""
+    
+    # Add headers as first row
+    all_rows = [headers] + rows
+    
+    # Calculate column widths
+    col_widths = []
+    for col in range(len(headers)):
+        col_width = max(len(str(row[col])) for row in all_rows)
+        col_widths.append(col_width)
+    
+    # Format rows
+    output = []
+    for row_idx, row in enumerate(all_rows):
+        formatted_cols = []
+        for col_idx, (value, width) in enumerate(zip(row, col_widths)):
+            if isinstance(value, float):
+                # Handle float formatting based on floatfmt
+                fmt = floatfmt[col_idx]
+                if fmt.startswith('+'):
+                    value = f"{value:+.1f}"
+                else:
+                    value = f"{value:.1f}"
+            elif isinstance(value, int) and col_idx > 0:  # Skip filename column
+                # Handle integer formatting based on floatfmt
+                fmt = floatfmt[col_idx]
+                if fmt.startswith('+'):
+                    value = f"{value:+d}"
+                else:
+                    value = f"{value:d}"
+            formatted_cols.append(str(value).ljust(width))
+        output.append("  ".join(formatted_cols))
+        
+        # Add separator line after headers
+        if row_idx == 0:
+            separator = []
+            for width in col_widths:
+                separator.append("-" * width)
+            output.append("  ".join(separator))
+    
+    return "\n".join(output)
+
+if __name__ == "__main__":
+    if len(sys.argv) == 3:
+        # Comparing two directories
+        headers = ["File", "Lines", "Diff", "Tokens/Line", "Diff"]
+        table = gen_diff(gen_stats(sys.argv[1]), gen_stats(sys.argv[2]))
+        
+        if table:
+            # Print table output
+            print("### Code Changes in 'exo' Directory")
+            print("```")
+            print(format_table(
+                sorted(table, key=lambda x: abs(x[2]) if len(x) > 2 else 0, reverse=True),
+                headers,
+                (".1f", "d", "+d", ".1f", "+.1f")
+            ))
+            total_changes = sum(row[2] for row in table)
+            print(f"\nTotal line changes: {display_diff(total_changes)}")
+            print("```")
+            
+            # Generate JSON report
+            report = create_json_report(table, is_diff=True)
+            with open('line-count-diff.json', 'w') as f:
+                json.dump(report, f, indent=2)
+    else:
+        # Single directory analysis
+        headers = ["File", "Lines", "Tokens/Line"]
+        table = gen_stats(sys.argv[1] if len(sys.argv) > 1 else ".")
+        
+        if table:
+            # Print table output
+            print("### Code Statistics for 'exo' Directory")
+            print("```")
+            print(format_table(
+                sorted(table, key=lambda x: x[1], reverse=True),
+                headers,
+                (".1f", "d", ".1f")
+            ))
+            total_lines = sum(row[1] for row in table)
+            print(f"\nTotal lines: {total_lines}")
+            print("```")
+            
+            # Generate JSON report
+            report = create_json_report(table, is_diff=False)
+            with open('line-count-snapshot.json', 'w') as f:
+                json.dump(report, f, indent=2)