From 09a020096a2564a3c3d06beb374368b5eb21be33 Mon Sep 17 00:00:00 2001 From: Adam Wolff Date: Wed, 24 Sep 2025 18:01:05 -0700 Subject: [PATCH] Restore dedupe workflows This reverts commit d81efef32491c20fc36a0a25a8f33d1a26c3ec78. --- .github/workflows/auto-close-duplicates.yml | 31 ++ .../workflows/backfill-duplicate-comments.yml | 44 +++ .github/workflows/claude-dedupe-issues.yml | 80 +++++ scripts/auto-close-duplicates.ts | 277 ++++++++++++++++++ scripts/backfill-duplicate-comments.ts | 213 ++++++++++++++ 5 files changed, 645 insertions(+) create mode 100644 .github/workflows/auto-close-duplicates.yml create mode 100644 .github/workflows/backfill-duplicate-comments.yml create mode 100644 .github/workflows/claude-dedupe-issues.yml create mode 100644 scripts/auto-close-duplicates.ts create mode 100644 scripts/backfill-duplicate-comments.ts diff --git a/.github/workflows/auto-close-duplicates.yml b/.github/workflows/auto-close-duplicates.yml new file mode 100644 index 0000000..b6ca056 --- /dev/null +++ b/.github/workflows/auto-close-duplicates.yml @@ -0,0 +1,31 @@ +name: Auto-close duplicate issues +description: Auto-closes issues that are duplicates of existing issues +on: + schedule: + - cron: "0 9 * * *" + workflow_dispatch: + +jobs: + auto-close-duplicates: + runs-on: ubuntu-latest + timeout-minutes: 10 + permissions: + contents: read + issues: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Setup Bun + uses: oven-sh/setup-bun@v2 + with: + bun-version: latest + + - name: Auto-close duplicate issues + run: bun run scripts/auto-close-duplicates.ts + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_REPOSITORY_OWNER: ${{ github.repository_owner }} + GITHUB_REPOSITORY_NAME: ${{ github.event.repository.name }} + STATSIG_API_KEY: ${{ secrets.STATSIG_API_KEY }} diff --git a/.github/workflows/backfill-duplicate-comments.yml b/.github/workflows/backfill-duplicate-comments.yml new file mode 100644 index 0000000..acce8f9 --- /dev/null +++ b/.github/workflows/backfill-duplicate-comments.yml @@ -0,0 +1,44 @@ +name: Backfill Duplicate Comments +description: Triggers duplicate detection for old issues that don't have duplicate comments + +on: + workflow_dispatch: + inputs: + days_back: + description: 'How many days back to look for old issues' + required: false + default: '90' + type: string + dry_run: + description: 'Dry run mode (true to only log what would be done)' + required: false + default: 'true' + type: choice + options: + - 'true' + - 'false' + +jobs: + backfill-duplicate-comments: + runs-on: ubuntu-latest + timeout-minutes: 30 + permissions: + contents: read + issues: read + actions: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Setup Bun + uses: oven-sh/setup-bun@v2 + with: + bun-version: latest + + - name: Backfill duplicate comments + run: bun run scripts/backfill-duplicate-comments.ts + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + DAYS_BACK: ${{ inputs.days_back }} + DRY_RUN: ${{ inputs.dry_run }} \ No newline at end of file diff --git a/.github/workflows/claude-dedupe-issues.yml b/.github/workflows/claude-dedupe-issues.yml new file mode 100644 index 0000000..9776b60 --- /dev/null +++ b/.github/workflows/claude-dedupe-issues.yml @@ -0,0 +1,80 @@ +name: Claude Issue Dedupe +description: Automatically dedupe GitHub issues using Claude Code +on: + issues: + types: [opened] + workflow_dispatch: + inputs: + issue_number: + description: 'Issue number to process for duplicate detection' + required: true + type: string + +jobs: + claude-dedupe-issues: + runs-on: ubuntu-latest + timeout-minutes: 10 + permissions: + contents: read + issues: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Run Claude Code slash command + uses: anthropics/claude-code-base-action@beta + with: + prompt: "/dedupe ${{ github.repository }}/issues/${{ github.event.issue.number || inputs.issue_number }}" + anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + claude_env: | + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Log duplicate comment event to Statsig + if: always() + env: + STATSIG_API_KEY: ${{ secrets.STATSIG_API_KEY }} + run: | + ISSUE_NUMBER=${{ github.event.issue.number || inputs.issue_number }} + REPO=${{ github.repository }} + + if [ -z "$STATSIG_API_KEY" ]; then + echo "STATSIG_API_KEY not found, skipping Statsig logging" + exit 0 + fi + + # Prepare the event payload + EVENT_PAYLOAD=$(jq -n \ + --arg issue_number "$ISSUE_NUMBER" \ + --arg repo "$REPO" \ + --arg triggered_by "${{ github.event_name }}" \ + '{ + events: [{ + eventName: "github_duplicate_comment_added", + value: 1, + metadata: { + repository: $repo, + issue_number: ($issue_number | tonumber), + triggered_by: $triggered_by, + workflow_run_id: "${{ github.run_id }}" + }, + time: (now | floor | tostring) + }] + }') + + # Send to Statsig API + echo "Logging duplicate comment event to Statsig for issue #${ISSUE_NUMBER}" + + RESPONSE=$(curl -s -w "\n%{http_code}" -X POST https://events.statsigapi.net/v1/log_event \ + -H "Content-Type: application/json" \ + -H "STATSIG-API-KEY: ${STATSIG_API_KEY}" \ + -d "$EVENT_PAYLOAD") + + HTTP_CODE=$(echo "$RESPONSE" | tail -n1) + BODY=$(echo "$RESPONSE" | head -n-1) + + if [ "$HTTP_CODE" -eq 200 ] || [ "$HTTP_CODE" -eq 202 ]; then + echo "Successfully logged duplicate comment event for issue #${ISSUE_NUMBER}" + else + echo "Failed to log duplicate comment event for issue #${ISSUE_NUMBER}. HTTP ${HTTP_CODE}: ${BODY}" + fi diff --git a/scripts/auto-close-duplicates.ts b/scripts/auto-close-duplicates.ts new file mode 100644 index 0000000..2ad3bd3 --- /dev/null +++ b/scripts/auto-close-duplicates.ts @@ -0,0 +1,277 @@ +#!/usr/bin/env bun + +declare global { + var process: { + env: Record; + }; +} + +interface GitHubIssue { + number: number; + title: string; + user: { id: number }; + created_at: string; +} + +interface GitHubComment { + id: number; + body: string; + created_at: string; + user: { type: string; id: number }; +} + +interface GitHubReaction { + user: { id: number }; + content: string; +} + +async function githubRequest(endpoint: string, token: string, method: string = 'GET', body?: any): Promise { + const response = await fetch(`https://api.github.com${endpoint}`, { + method, + headers: { + Authorization: `Bearer ${token}`, + Accept: "application/vnd.github.v3+json", + "User-Agent": "auto-close-duplicates-script", + ...(body && { "Content-Type": "application/json" }), + }, + ...(body && { body: JSON.stringify(body) }), + }); + + if (!response.ok) { + throw new Error( + `GitHub API request failed: ${response.status} ${response.statusText}` + ); + } + + return response.json(); +} + +function extractDuplicateIssueNumber(commentBody: string): number | null { + // Try to match #123 format first + let match = commentBody.match(/#(\d+)/); + if (match) { + return parseInt(match[1], 10); + } + + // Try to match GitHub issue URL format: https://github.com/owner/repo/issues/123 + match = commentBody.match(/github\.com\/[^\/]+\/[^\/]+\/issues\/(\d+)/); + if (match) { + return parseInt(match[1], 10); + } + + return null; +} + + +async function closeIssueAsDuplicate( + owner: string, + repo: string, + issueNumber: number, + duplicateOfNumber: number, + token: string +): Promise { + await githubRequest( + `/repos/${owner}/${repo}/issues/${issueNumber}`, + token, + 'PATCH', + { + state: 'closed', + state_reason: 'duplicate', + labels: ['duplicate'] + } + ); + + await githubRequest( + `/repos/${owner}/${repo}/issues/${issueNumber}/comments`, + token, + 'POST', + { + body: `This issue has been automatically closed as a duplicate of #${duplicateOfNumber}. + +If this is incorrect, please re-open this issue or create a new one. + +🤖 Generated with [Claude Code](https://claude.ai/code)` + } + ); + +} + +async function autoCloseDuplicates(): Promise { + console.log("[DEBUG] Starting auto-close duplicates script"); + + const token = process.env.GITHUB_TOKEN; + if (!token) { + throw new Error("GITHUB_TOKEN environment variable is required"); + } + console.log("[DEBUG] GitHub token found"); + + const owner = process.env.GITHUB_REPOSITORY_OWNER || "anthropics"; + const repo = process.env.GITHUB_REPOSITORY_NAME || "claude-code"; + console.log(`[DEBUG] Repository: ${owner}/${repo}`); + + const threeDaysAgo = new Date(); + threeDaysAgo.setDate(threeDaysAgo.getDate() - 3); + console.log( + `[DEBUG] Checking for duplicate comments older than: ${threeDaysAgo.toISOString()}` + ); + + console.log("[DEBUG] Fetching open issues created more than 3 days ago..."); + const allIssues: GitHubIssue[] = []; + let page = 1; + const perPage = 100; + + while (true) { + const pageIssues: GitHubIssue[] = await githubRequest( + `/repos/${owner}/${repo}/issues?state=open&per_page=${perPage}&page=${page}`, + token + ); + + if (pageIssues.length === 0) break; + + // Filter for issues created more than 3 days ago + const oldEnoughIssues = pageIssues.filter(issue => + new Date(issue.created_at) <= threeDaysAgo + ); + + allIssues.push(...oldEnoughIssues); + page++; + + // Safety limit to avoid infinite loops + if (page > 20) break; + } + + const issues = allIssues; + console.log(`[DEBUG] Found ${issues.length} open issues`); + + let processedCount = 0; + let candidateCount = 0; + + for (const issue of issues) { + processedCount++; + console.log( + `[DEBUG] Processing issue #${issue.number} (${processedCount}/${issues.length}): ${issue.title}` + ); + + console.log(`[DEBUG] Fetching comments for issue #${issue.number}...`); + const comments: GitHubComment[] = await githubRequest( + `/repos/${owner}/${repo}/issues/${issue.number}/comments`, + token + ); + console.log( + `[DEBUG] Issue #${issue.number} has ${comments.length} comments` + ); + + const dupeComments = comments.filter( + (comment) => + comment.body.includes("Found") && + comment.body.includes("possible duplicate") && + comment.user.type === "Bot" + ); + console.log( + `[DEBUG] Issue #${issue.number} has ${dupeComments.length} duplicate detection comments` + ); + + if (dupeComments.length === 0) { + console.log( + `[DEBUG] Issue #${issue.number} - no duplicate comments found, skipping` + ); + continue; + } + + const lastDupeComment = dupeComments[dupeComments.length - 1]; + const dupeCommentDate = new Date(lastDupeComment.created_at); + console.log( + `[DEBUG] Issue #${ + issue.number + } - most recent duplicate comment from: ${dupeCommentDate.toISOString()}` + ); + + if (dupeCommentDate > threeDaysAgo) { + console.log( + `[DEBUG] Issue #${issue.number} - duplicate comment is too recent, skipping` + ); + continue; + } + console.log( + `[DEBUG] Issue #${ + issue.number + } - duplicate comment is old enough (${Math.floor( + (Date.now() - dupeCommentDate.getTime()) / (1000 * 60 * 60 * 24) + )} days)` + ); + + const commentsAfterDupe = comments.filter( + (comment) => new Date(comment.created_at) > dupeCommentDate + ); + console.log( + `[DEBUG] Issue #${issue.number} - ${commentsAfterDupe.length} comments after duplicate detection` + ); + + if (commentsAfterDupe.length > 0) { + console.log( + `[DEBUG] Issue #${issue.number} - has activity after duplicate comment, skipping` + ); + continue; + } + + console.log( + `[DEBUG] Issue #${issue.number} - checking reactions on duplicate comment...` + ); + const reactions: GitHubReaction[] = await githubRequest( + `/repos/${owner}/${repo}/issues/comments/${lastDupeComment.id}/reactions`, + token + ); + console.log( + `[DEBUG] Issue #${issue.number} - duplicate comment has ${reactions.length} reactions` + ); + + const authorThumbsDown = reactions.some( + (reaction) => + reaction.user.id === issue.user.id && reaction.content === "-1" + ); + console.log( + `[DEBUG] Issue #${issue.number} - author thumbs down reaction: ${authorThumbsDown}` + ); + + if (authorThumbsDown) { + console.log( + `[DEBUG] Issue #${issue.number} - author disagreed with duplicate detection, skipping` + ); + continue; + } + + const duplicateIssueNumber = extractDuplicateIssueNumber(lastDupeComment.body); + if (!duplicateIssueNumber) { + console.log( + `[DEBUG] Issue #${issue.number} - could not extract duplicate issue number from comment, skipping` + ); + continue; + } + + candidateCount++; + const issueUrl = `https://github.com/${owner}/${repo}/issues/${issue.number}`; + + try { + console.log( + `[INFO] Auto-closing issue #${issue.number} as duplicate of #${duplicateIssueNumber}: ${issueUrl}` + ); + await closeIssueAsDuplicate(owner, repo, issue.number, duplicateIssueNumber, token); + console.log( + `[SUCCESS] Successfully closed issue #${issue.number} as duplicate of #${duplicateIssueNumber}` + ); + } catch (error) { + console.error( + `[ERROR] Failed to close issue #${issue.number} as duplicate: ${error}` + ); + } + } + + console.log( + `[DEBUG] Script completed. Processed ${processedCount} issues, found ${candidateCount} candidates for auto-close` + ); +} + +autoCloseDuplicates().catch(console.error); + +// Make it a module +export {}; diff --git a/scripts/backfill-duplicate-comments.ts b/scripts/backfill-duplicate-comments.ts new file mode 100644 index 0000000..f79ab43 --- /dev/null +++ b/scripts/backfill-duplicate-comments.ts @@ -0,0 +1,213 @@ +#!/usr/bin/env bun + +declare global { + var process: { + env: Record; + }; +} + +interface GitHubIssue { + number: number; + title: string; + state: string; + state_reason?: string; + user: { id: number }; + created_at: string; + closed_at?: string; +} + +interface GitHubComment { + id: number; + body: string; + created_at: string; + user: { type: string; id: number }; +} + +async function githubRequest(endpoint: string, token: string, method: string = 'GET', body?: any): Promise { + const response = await fetch(`https://api.github.com${endpoint}`, { + method, + headers: { + Authorization: `Bearer ${token}`, + Accept: "application/vnd.github.v3+json", + "User-Agent": "backfill-duplicate-comments-script", + ...(body && { "Content-Type": "application/json" }), + }, + ...(body && { body: JSON.stringify(body) }), + }); + + if (!response.ok) { + throw new Error( + `GitHub API request failed: ${response.status} ${response.statusText}` + ); + } + + return response.json(); +} + +async function triggerDedupeWorkflow( + owner: string, + repo: string, + issueNumber: number, + token: string, + dryRun: boolean = true +): Promise { + if (dryRun) { + console.log(`[DRY RUN] Would trigger dedupe workflow for issue #${issueNumber}`); + return; + } + + await githubRequest( + `/repos/${owner}/${repo}/actions/workflows/claude-dedupe-issues.yml/dispatches`, + token, + 'POST', + { + ref: 'main', + inputs: { + issue_number: issueNumber.toString() + } + } + ); +} + +async function backfillDuplicateComments(): Promise { + console.log("[DEBUG] Starting backfill duplicate comments script"); + + const token = process.env.GITHUB_TOKEN; + if (!token) { + throw new Error(`GITHUB_TOKEN environment variable is required + +Usage: + GITHUB_TOKEN=your_token bun run scripts/backfill-duplicate-comments.ts + +Environment Variables: + GITHUB_TOKEN - GitHub personal access token with repo and actions permissions (required) + DRY_RUN - Set to "false" to actually trigger workflows (default: true for safety) + MAX_ISSUE_NUMBER - Only process issues with numbers less than this value (default: 4050)`); + } + console.log("[DEBUG] GitHub token found"); + + const owner = "anthropics"; + const repo = "claude-code"; + const dryRun = process.env.DRY_RUN !== "false"; + const maxIssueNumber = parseInt(process.env.MAX_ISSUE_NUMBER || "4050", 10); + const minIssueNumber = parseInt(process.env.MIN_ISSUE_NUMBER || "1", 10); + + console.log(`[DEBUG] Repository: ${owner}/${repo}`); + console.log(`[DEBUG] Dry run mode: ${dryRun}`); + console.log(`[DEBUG] Looking at issues between #${minIssueNumber} and #${maxIssueNumber}`); + + console.log(`[DEBUG] Fetching issues between #${minIssueNumber} and #${maxIssueNumber}...`); + const allIssues: GitHubIssue[] = []; + let page = 1; + const perPage = 100; + + while (true) { + const pageIssues: GitHubIssue[] = await githubRequest( + `/repos/${owner}/${repo}/issues?state=all&per_page=${perPage}&page=${page}&sort=created&direction=desc`, + token + ); + + if (pageIssues.length === 0) break; + + // Filter to only include issues within the specified range + const filteredIssues = pageIssues.filter(issue => + issue.number >= minIssueNumber && issue.number < maxIssueNumber + ); + allIssues.push(...filteredIssues); + + // If the oldest issue in this page is still above our minimum, we need to continue + // but if the oldest issue is below our minimum, we can stop + const oldestIssueInPage = pageIssues[pageIssues.length - 1]; + if (oldestIssueInPage && oldestIssueInPage.number >= maxIssueNumber) { + console.log(`[DEBUG] Oldest issue in page #${page} is #${oldestIssueInPage.number}, continuing...`); + } else if (oldestIssueInPage && oldestIssueInPage.number < minIssueNumber) { + console.log(`[DEBUG] Oldest issue in page #${page} is #${oldestIssueInPage.number}, below minimum, stopping`); + break; + } else if (filteredIssues.length === 0 && pageIssues.length > 0) { + console.log(`[DEBUG] No issues in page #${page} are in range #${minIssueNumber}-#${maxIssueNumber}, continuing...`); + } + + page++; + + // Safety limit to avoid infinite loops + if (page > 200) { + console.log("[DEBUG] Reached page limit, stopping pagination"); + break; + } + } + + console.log(`[DEBUG] Found ${allIssues.length} issues between #${minIssueNumber} and #${maxIssueNumber}`); + + let processedCount = 0; + let candidateCount = 0; + let triggeredCount = 0; + + for (const issue of allIssues) { + processedCount++; + console.log( + `[DEBUG] Processing issue #${issue.number} (${processedCount}/${allIssues.length}): ${issue.title}` + ); + + console.log(`[DEBUG] Fetching comments for issue #${issue.number}...`); + const comments: GitHubComment[] = await githubRequest( + `/repos/${owner}/${repo}/issues/${issue.number}/comments`, + token + ); + console.log( + `[DEBUG] Issue #${issue.number} has ${comments.length} comments` + ); + + // Look for existing duplicate detection comments (from the dedupe bot) + const dupeDetectionComments = comments.filter( + (comment) => + comment.body.includes("Found") && + comment.body.includes("possible duplicate") && + comment.user.type === "Bot" + ); + + console.log( + `[DEBUG] Issue #${issue.number} has ${dupeDetectionComments.length} duplicate detection comments` + ); + + // Skip if there's already a duplicate detection comment + if (dupeDetectionComments.length > 0) { + console.log( + `[DEBUG] Issue #${issue.number} already has duplicate detection comment, skipping` + ); + continue; + } + + candidateCount++; + const issueUrl = `https://github.com/${owner}/${repo}/issues/${issue.number}`; + + try { + console.log( + `[INFO] ${dryRun ? '[DRY RUN] ' : ''}Triggering dedupe workflow for issue #${issue.number}: ${issueUrl}` + ); + await triggerDedupeWorkflow(owner, repo, issue.number, token, dryRun); + + if (!dryRun) { + console.log( + `[SUCCESS] Successfully triggered dedupe workflow for issue #${issue.number}` + ); + } + triggeredCount++; + } catch (error) { + console.error( + `[ERROR] Failed to trigger workflow for issue #${issue.number}: ${error}` + ); + } + + // Add a delay between workflow triggers to avoid overwhelming the system + await new Promise(resolve => setTimeout(resolve, 1000)); + } + + console.log( + `[DEBUG] Script completed. Processed ${processedCount} issues, found ${candidateCount} candidates without duplicate comments, ${dryRun ? 'would trigger' : 'triggered'} ${triggeredCount} workflows` + ); +} + +backfillDuplicateComments().catch(console.error); + +// Make it a module +export {}; \ No newline at end of file