mirror of
https://github.com/baz-scm/awesome-reviewers.git
synced 2025-08-20 18:58:52 +03:00
119 lines
3.9 KiB
Python
Executable File
119 lines
3.9 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""Generate aggregated contributor leaderboard data."""
|
|
import json
|
|
import os
|
|
from pathlib import Path
|
|
from collections import defaultdict
|
|
from datetime import datetime
|
|
import yaml
|
|
|
|
from github_utils import fetch_profile
|
|
|
|
def parse_front_matter(md_path):
|
|
try:
|
|
text = md_path.read_text(encoding='utf-8')
|
|
except FileNotFoundError:
|
|
return {}
|
|
if not text.startswith('---'):
|
|
return {}
|
|
parts = text.split('---', 2)
|
|
if len(parts) >= 3:
|
|
return yaml.safe_load(parts[1]) or {}
|
|
return {}
|
|
|
|
def main():
|
|
reviewers_dir = Path('_reviewers')
|
|
users = defaultdict(lambda: {
|
|
'reviewers': set(),
|
|
'repos': set(),
|
|
'last': None,
|
|
'entry_titles': {},
|
|
'comments': defaultdict(list)
|
|
})
|
|
for json_path in reviewers_dir.glob('*.json'):
|
|
slug = json_path.stem
|
|
meta = parse_front_matter(reviewers_dir / f'{slug}.md')
|
|
title = meta.get('title', slug)
|
|
with open(json_path, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
for item in data:
|
|
repo = item.get('repo_full_name')
|
|
for c in item.get('discussion_comments', []):
|
|
author = c.get('comment_author')
|
|
ts = c.get('comment_created_at')
|
|
text = c.get('comment_body')
|
|
if not author or not ts:
|
|
continue
|
|
if author == 'Copilot' or '[bot]' in author:
|
|
continue
|
|
info = users[author]
|
|
info['reviewers'].add(slug)
|
|
info['entry_titles'][slug] = title
|
|
if repo:
|
|
info['repos'].add(repo)
|
|
if text:
|
|
info['comments'][slug].append(text)
|
|
dt = datetime.fromisoformat(ts.replace('Z','+00:00'))
|
|
if info['last'] is None or dt > info['last']:
|
|
info['last'] = dt
|
|
|
|
output = []
|
|
for user, d in users.items():
|
|
output.append({
|
|
'name': user,
|
|
'reviewers_count': len(d['reviewers']),
|
|
'repos_count': len(d['repos']),
|
|
'last_contribution': d['last'].isoformat() if d['last'] else None
|
|
})
|
|
output.sort(key=lambda x: x['reviewers_count'], reverse=True)
|
|
# Keep only the top 100 contributors to keep the dataset small
|
|
output = output[:100]
|
|
|
|
top_users = {u['name'] for u in output}
|
|
|
|
# Load existing contributor data to reuse cached profiles
|
|
assets_dir = Path('assets/data')
|
|
assets_dir.mkdir(parents=True, exist_ok=True)
|
|
existing = {}
|
|
existing_path = assets_dir / 'contributors.json'
|
|
if existing_path.exists():
|
|
try:
|
|
with open(existing_path, 'r', encoding='utf-8') as f:
|
|
existing = json.load(f)
|
|
except Exception:
|
|
existing = {}
|
|
|
|
token = os.getenv('GITHUB_TOKEN')
|
|
|
|
contributors = {}
|
|
for user in top_users:
|
|
d = users[user]
|
|
entries = [
|
|
{'slug': s, 'title': d['entry_titles'][s]}
|
|
for s in sorted(d['entry_titles'])
|
|
]
|
|
info = {
|
|
'repos': sorted(d['repos']),
|
|
'entries': entries,
|
|
'comments': {k: v for k, v in d['comments'].items()}
|
|
}
|
|
profile = existing.get(user, {}).get('profile')
|
|
if profile is None:
|
|
profile = fetch_profile(user, token)
|
|
if profile is not None:
|
|
info['profile'] = profile
|
|
contributors[user] = info
|
|
|
|
data_dir = Path('_data')
|
|
data_dir.mkdir(exist_ok=True)
|
|
with open(data_dir / 'leaderboard.json', 'w', encoding='utf-8') as f:
|
|
json.dump(output, f, indent=2, ensure_ascii=False)
|
|
print(f'Wrote {len(output)} contributors to _data/leaderboard.json')
|
|
|
|
with open(assets_dir / 'contributors.json', 'w', encoding='utf-8') as f:
|
|
json.dump(contributors, f, indent=2, ensure_ascii=False)
|
|
print(f'Wrote {len(contributors)} users to assets/data/contributors.json')
|
|
|
|
if __name__ == '__main__':
|
|
main()
|