count lines of code

This commit is contained in:
Alex Cheema
2024-11-25 13:24:28 +04:00
parent 2c5d05538c
commit c0646be2a1
2 changed files with 267 additions and 0 deletions

View File

@@ -269,11 +269,68 @@ jobs:
path: ./pipsize.json
destination: pip-sizes.json
check_line_count:
docker:
- image: cimg/python:3.10
steps:
- checkout
- run:
name: Setup git for PR comparison
command: |
if [[ -n "$CIRCLE_PULL_REQUEST" ]]; then
PR_NUMBER=$(echo $CIRCLE_PULL_REQUEST | rev | cut -d'/' -f1 | rev)
BASE_BRANCH=$(curl -s -H "Circle-Token: $CIRCLE_TOKEN" \
"https://circleci.com/api/v2/project/github/$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME/pipeline/$CIRCLE_WORKFLOW_ID" \
| jq -r '.target_branch')
git clone -b $BASE_BRANCH --single-branch \
https://github.com/$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME.git \
base_branch
fi
- run:
name: Install dependencies
command: |
python -m pip install --upgrade pip
pip install tabulate
- run:
name: Run line count check
command: |
if [[ -n "$CIRCLE_PULL_REQUEST" ]]; then
python extra/line_counter.py base_branch .
else
python extra/line_counter.py .
fi
- store_artifacts:
path: line-count-snapshot.json
destination: line-count-snapshot.json
- store_artifacts:
path: line-count-diff.json
destination: line-count-diff.json
- run:
name: Create test results directory
command: |
mkdir -p test-results/line-count
cp line-count-*.json test-results/line-count/
- store_test_results:
path: test-results
workflows:
version: 2
build_and_test:
jobs:
- check_line_count:
filters:
branches:
only: /.*/
tags:
only: /.*/
- unit_test
- discovery_integration_test
- chatgpt_api_integration_test_mlx

210
extra/line_counter.py Normal file
View File

@@ -0,0 +1,210 @@
#!/usr/bin/env python3
import os
import sys
import json
import token
import tokenize
from datetime import datetime, timezone
TOKEN_WHITELIST = [token.OP, token.NAME, token.NUMBER, token.STRING]
def is_docstring(t):
return t.type == token.STRING and t.string.startswith('"""') and t.line.strip().startswith('"""')
def gen_stats(base_path="."):
table = []
exo_path = os.path.join(base_path, "exo")
if not os.path.exists(exo_path):
print(f"Warning: {exo_path} directory not found")
return table
for path, _, files in os.walk(exo_path):
for name in files:
if not name.endswith(".py"):
continue
filepath = os.path.join(path, name)
relfilepath = os.path.relpath(filepath, base_path).replace('\\', '/')
try:
with tokenize.open(filepath) as file_:
tokens = [t for t in tokenize.generate_tokens(file_.readline)
if t.type in TOKEN_WHITELIST and not is_docstring(t)]
token_count = len(tokens)
line_count = len(set([x for t in tokens
for x in range(t.start[0], t.end[0]+1)]))
if line_count > 0:
table.append([relfilepath, line_count, token_count/line_count])
except Exception as e:
print(f"Error processing {filepath}: {e}")
continue
return table
def gen_diff(table_old, table_new):
table = []
files_new = set([x[0] for x in table_new])
files_old = set([x[0] for x in table_old])
added = files_new - files_old
deleted = files_old - files_new
unchanged = files_new & files_old
for file in added:
file_stat = [stats for stats in table_new if file in stats][0]
table.append([file_stat[0], file_stat[1], file_stat[1], file_stat[2], file_stat[2]])
for file in deleted:
file_stat = [stats for stats in table_old if file in stats][0]
table.append([file_stat[0], 0, -file_stat[1], 0, -file_stat[2]])
for file in unchanged:
file_stat_old = [stats for stats in table_old if file in stats][0]
file_stat_new = [stats for stats in table_new if file in stats][0]
if file_stat_new[1] != file_stat_old[1] or file_stat_new[2] != file_stat_old[2]:
table.append([
file_stat_new[0],
file_stat_new[1],
file_stat_new[1] - file_stat_old[1],
file_stat_new[2],
file_stat_new[2] - file_stat_old[2]
])
return table
def create_json_report(table, is_diff=False):
timestamp = datetime.now(timezone.utc).isoformat()
commit_sha = os.environ.get('CIRCLE_SHA1', 'unknown')
branch = os.environ.get('CIRCLE_BRANCH', 'unknown')
pr_number = os.environ.get('CIRCLE_PR_NUMBER', '')
if is_diff:
files = [{
'name': row[0],
'current_lines': row[1],
'line_diff': row[2],
'current_tokens_per_line': row[3],
'tokens_per_line_diff': row[4]
} for row in table]
report = {
'type': 'diff',
'timestamp': timestamp,
'commit_sha': commit_sha,
'branch': branch,
'pr_number': pr_number,
'files': files,
'total_line_changes': sum(row[2] for row in table),
'total_files_changed': len(files)
}
else:
files = [{
'name': row[0],
'lines': row[1],
'tokens_per_line': row[2]
} for row in table]
report = {
'type': 'snapshot',
'timestamp': timestamp,
'commit_sha': commit_sha,
'branch': branch,
'files': files,
'total_lines': sum(row[1] for row in table),
'total_files': len(files)
}
return report
def display_diff(diff):
return "+" + str(diff) if diff > 0 else str(diff)
def format_table(rows, headers, floatfmt):
if not rows:
return ""
# Add headers as first row
all_rows = [headers] + rows
# Calculate column widths
col_widths = []
for col in range(len(headers)):
col_width = max(len(str(row[col])) for row in all_rows)
col_widths.append(col_width)
# Format rows
output = []
for row_idx, row in enumerate(all_rows):
formatted_cols = []
for col_idx, (value, width) in enumerate(zip(row, col_widths)):
if isinstance(value, float):
# Handle float formatting based on floatfmt
fmt = floatfmt[col_idx]
if fmt.startswith('+'):
value = f"{value:+.1f}"
else:
value = f"{value:.1f}"
elif isinstance(value, int) and col_idx > 0: # Skip filename column
# Handle integer formatting based on floatfmt
fmt = floatfmt[col_idx]
if fmt.startswith('+'):
value = f"{value:+d}"
else:
value = f"{value:d}"
formatted_cols.append(str(value).ljust(width))
output.append(" ".join(formatted_cols))
# Add separator line after headers
if row_idx == 0:
separator = []
for width in col_widths:
separator.append("-" * width)
output.append(" ".join(separator))
return "\n".join(output)
if __name__ == "__main__":
if len(sys.argv) == 3:
# Comparing two directories
headers = ["File", "Lines", "Diff", "Tokens/Line", "Diff"]
table = gen_diff(gen_stats(sys.argv[1]), gen_stats(sys.argv[2]))
if table:
# Print table output
print("### Code Changes in 'exo' Directory")
print("```")
print(format_table(
sorted(table, key=lambda x: abs(x[2]) if len(x) > 2 else 0, reverse=True),
headers,
(".1f", "d", "+d", ".1f", "+.1f")
))
total_changes = sum(row[2] for row in table)
print(f"\nTotal line changes: {display_diff(total_changes)}")
print("```")
# Generate JSON report
report = create_json_report(table, is_diff=True)
with open('line-count-diff.json', 'w') as f:
json.dump(report, f, indent=2)
else:
# Single directory analysis
headers = ["File", "Lines", "Tokens/Line"]
table = gen_stats(sys.argv[1] if len(sys.argv) > 1 else ".")
if table:
# Print table output
print("### Code Statistics for 'exo' Directory")
print("```")
print(format_table(
sorted(table, key=lambda x: x[1], reverse=True),
headers,
(".1f", "d", ".1f")
))
total_lines = sum(row[1] for row in table)
print(f"\nTotal lines: {total_lines}")
print("```")
# Generate JSON report
report = create_json_report(table, is_diff=False)
with open('line-count-snapshot.json', 'w') as f:
json.dump(report, f, indent=2)