Restore dedupe workflows

This reverts commit d81efef324.
This commit is contained in:
Adam Wolff
2025-09-24 18:01:05 -07:00
parent f5f6c7693c
commit 09a020096a
5 changed files with 645 additions and 0 deletions

View File

@@ -0,0 +1,31 @@
name: Auto-close duplicate issues
description: Auto-closes issues that are duplicates of existing issues
on:
schedule:
- cron: "0 9 * * *"
workflow_dispatch:
jobs:
auto-close-duplicates:
runs-on: ubuntu-latest
timeout-minutes: 10
permissions:
contents: read
issues: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Bun
uses: oven-sh/setup-bun@v2
with:
bun-version: latest
- name: Auto-close duplicate issues
run: bun run scripts/auto-close-duplicates.ts
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_REPOSITORY_OWNER: ${{ github.repository_owner }}
GITHUB_REPOSITORY_NAME: ${{ github.event.repository.name }}
STATSIG_API_KEY: ${{ secrets.STATSIG_API_KEY }}

View File

@@ -0,0 +1,44 @@
name: Backfill Duplicate Comments
description: Triggers duplicate detection for old issues that don't have duplicate comments
on:
workflow_dispatch:
inputs:
days_back:
description: 'How many days back to look for old issues'
required: false
default: '90'
type: string
dry_run:
description: 'Dry run mode (true to only log what would be done)'
required: false
default: 'true'
type: choice
options:
- 'true'
- 'false'
jobs:
backfill-duplicate-comments:
runs-on: ubuntu-latest
timeout-minutes: 30
permissions:
contents: read
issues: read
actions: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Bun
uses: oven-sh/setup-bun@v2
with:
bun-version: latest
- name: Backfill duplicate comments
run: bun run scripts/backfill-duplicate-comments.ts
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
DAYS_BACK: ${{ inputs.days_back }}
DRY_RUN: ${{ inputs.dry_run }}

View File

@@ -0,0 +1,80 @@
name: Claude Issue Dedupe
description: Automatically dedupe GitHub issues using Claude Code
on:
issues:
types: [opened]
workflow_dispatch:
inputs:
issue_number:
description: 'Issue number to process for duplicate detection'
required: true
type: string
jobs:
claude-dedupe-issues:
runs-on: ubuntu-latest
timeout-minutes: 10
permissions:
contents: read
issues: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Run Claude Code slash command
uses: anthropics/claude-code-base-action@beta
with:
prompt: "/dedupe ${{ github.repository }}/issues/${{ github.event.issue.number || inputs.issue_number }}"
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
claude_env: |
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Log duplicate comment event to Statsig
if: always()
env:
STATSIG_API_KEY: ${{ secrets.STATSIG_API_KEY }}
run: |
ISSUE_NUMBER=${{ github.event.issue.number || inputs.issue_number }}
REPO=${{ github.repository }}
if [ -z "$STATSIG_API_KEY" ]; then
echo "STATSIG_API_KEY not found, skipping Statsig logging"
exit 0
fi
# Prepare the event payload
EVENT_PAYLOAD=$(jq -n \
--arg issue_number "$ISSUE_NUMBER" \
--arg repo "$REPO" \
--arg triggered_by "${{ github.event_name }}" \
'{
events: [{
eventName: "github_duplicate_comment_added",
value: 1,
metadata: {
repository: $repo,
issue_number: ($issue_number | tonumber),
triggered_by: $triggered_by,
workflow_run_id: "${{ github.run_id }}"
},
time: (now | floor | tostring)
}]
}')
# Send to Statsig API
echo "Logging duplicate comment event to Statsig for issue #${ISSUE_NUMBER}"
RESPONSE=$(curl -s -w "\n%{http_code}" -X POST https://events.statsigapi.net/v1/log_event \
-H "Content-Type: application/json" \
-H "STATSIG-API-KEY: ${STATSIG_API_KEY}" \
-d "$EVENT_PAYLOAD")
HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
BODY=$(echo "$RESPONSE" | head -n-1)
if [ "$HTTP_CODE" -eq 200 ] || [ "$HTTP_CODE" -eq 202 ]; then
echo "Successfully logged duplicate comment event for issue #${ISSUE_NUMBER}"
else
echo "Failed to log duplicate comment event for issue #${ISSUE_NUMBER}. HTTP ${HTTP_CODE}: ${BODY}"
fi

View File

@@ -0,0 +1,277 @@
#!/usr/bin/env bun
declare global {
var process: {
env: Record<string, string | undefined>;
};
}
interface GitHubIssue {
number: number;
title: string;
user: { id: number };
created_at: string;
}
interface GitHubComment {
id: number;
body: string;
created_at: string;
user: { type: string; id: number };
}
interface GitHubReaction {
user: { id: number };
content: string;
}
async function githubRequest<T>(endpoint: string, token: string, method: string = 'GET', body?: any): Promise<T> {
const response = await fetch(`https://api.github.com${endpoint}`, {
method,
headers: {
Authorization: `Bearer ${token}`,
Accept: "application/vnd.github.v3+json",
"User-Agent": "auto-close-duplicates-script",
...(body && { "Content-Type": "application/json" }),
},
...(body && { body: JSON.stringify(body) }),
});
if (!response.ok) {
throw new Error(
`GitHub API request failed: ${response.status} ${response.statusText}`
);
}
return response.json();
}
function extractDuplicateIssueNumber(commentBody: string): number | null {
// Try to match #123 format first
let match = commentBody.match(/#(\d+)/);
if (match) {
return parseInt(match[1], 10);
}
// Try to match GitHub issue URL format: https://github.com/owner/repo/issues/123
match = commentBody.match(/github\.com\/[^\/]+\/[^\/]+\/issues\/(\d+)/);
if (match) {
return parseInt(match[1], 10);
}
return null;
}
async function closeIssueAsDuplicate(
owner: string,
repo: string,
issueNumber: number,
duplicateOfNumber: number,
token: string
): Promise<void> {
await githubRequest(
`/repos/${owner}/${repo}/issues/${issueNumber}`,
token,
'PATCH',
{
state: 'closed',
state_reason: 'duplicate',
labels: ['duplicate']
}
);
await githubRequest(
`/repos/${owner}/${repo}/issues/${issueNumber}/comments`,
token,
'POST',
{
body: `This issue has been automatically closed as a duplicate of #${duplicateOfNumber}.
If this is incorrect, please re-open this issue or create a new one.
🤖 Generated with [Claude Code](https://claude.ai/code)`
}
);
}
async function autoCloseDuplicates(): Promise<void> {
console.log("[DEBUG] Starting auto-close duplicates script");
const token = process.env.GITHUB_TOKEN;
if (!token) {
throw new Error("GITHUB_TOKEN environment variable is required");
}
console.log("[DEBUG] GitHub token found");
const owner = process.env.GITHUB_REPOSITORY_OWNER || "anthropics";
const repo = process.env.GITHUB_REPOSITORY_NAME || "claude-code";
console.log(`[DEBUG] Repository: ${owner}/${repo}`);
const threeDaysAgo = new Date();
threeDaysAgo.setDate(threeDaysAgo.getDate() - 3);
console.log(
`[DEBUG] Checking for duplicate comments older than: ${threeDaysAgo.toISOString()}`
);
console.log("[DEBUG] Fetching open issues created more than 3 days ago...");
const allIssues: GitHubIssue[] = [];
let page = 1;
const perPage = 100;
while (true) {
const pageIssues: GitHubIssue[] = await githubRequest(
`/repos/${owner}/${repo}/issues?state=open&per_page=${perPage}&page=${page}`,
token
);
if (pageIssues.length === 0) break;
// Filter for issues created more than 3 days ago
const oldEnoughIssues = pageIssues.filter(issue =>
new Date(issue.created_at) <= threeDaysAgo
);
allIssues.push(...oldEnoughIssues);
page++;
// Safety limit to avoid infinite loops
if (page > 20) break;
}
const issues = allIssues;
console.log(`[DEBUG] Found ${issues.length} open issues`);
let processedCount = 0;
let candidateCount = 0;
for (const issue of issues) {
processedCount++;
console.log(
`[DEBUG] Processing issue #${issue.number} (${processedCount}/${issues.length}): ${issue.title}`
);
console.log(`[DEBUG] Fetching comments for issue #${issue.number}...`);
const comments: GitHubComment[] = await githubRequest(
`/repos/${owner}/${repo}/issues/${issue.number}/comments`,
token
);
console.log(
`[DEBUG] Issue #${issue.number} has ${comments.length} comments`
);
const dupeComments = comments.filter(
(comment) =>
comment.body.includes("Found") &&
comment.body.includes("possible duplicate") &&
comment.user.type === "Bot"
);
console.log(
`[DEBUG] Issue #${issue.number} has ${dupeComments.length} duplicate detection comments`
);
if (dupeComments.length === 0) {
console.log(
`[DEBUG] Issue #${issue.number} - no duplicate comments found, skipping`
);
continue;
}
const lastDupeComment = dupeComments[dupeComments.length - 1];
const dupeCommentDate = new Date(lastDupeComment.created_at);
console.log(
`[DEBUG] Issue #${
issue.number
} - most recent duplicate comment from: ${dupeCommentDate.toISOString()}`
);
if (dupeCommentDate > threeDaysAgo) {
console.log(
`[DEBUG] Issue #${issue.number} - duplicate comment is too recent, skipping`
);
continue;
}
console.log(
`[DEBUG] Issue #${
issue.number
} - duplicate comment is old enough (${Math.floor(
(Date.now() - dupeCommentDate.getTime()) / (1000 * 60 * 60 * 24)
)} days)`
);
const commentsAfterDupe = comments.filter(
(comment) => new Date(comment.created_at) > dupeCommentDate
);
console.log(
`[DEBUG] Issue #${issue.number} - ${commentsAfterDupe.length} comments after duplicate detection`
);
if (commentsAfterDupe.length > 0) {
console.log(
`[DEBUG] Issue #${issue.number} - has activity after duplicate comment, skipping`
);
continue;
}
console.log(
`[DEBUG] Issue #${issue.number} - checking reactions on duplicate comment...`
);
const reactions: GitHubReaction[] = await githubRequest(
`/repos/${owner}/${repo}/issues/comments/${lastDupeComment.id}/reactions`,
token
);
console.log(
`[DEBUG] Issue #${issue.number} - duplicate comment has ${reactions.length} reactions`
);
const authorThumbsDown = reactions.some(
(reaction) =>
reaction.user.id === issue.user.id && reaction.content === "-1"
);
console.log(
`[DEBUG] Issue #${issue.number} - author thumbs down reaction: ${authorThumbsDown}`
);
if (authorThumbsDown) {
console.log(
`[DEBUG] Issue #${issue.number} - author disagreed with duplicate detection, skipping`
);
continue;
}
const duplicateIssueNumber = extractDuplicateIssueNumber(lastDupeComment.body);
if (!duplicateIssueNumber) {
console.log(
`[DEBUG] Issue #${issue.number} - could not extract duplicate issue number from comment, skipping`
);
continue;
}
candidateCount++;
const issueUrl = `https://github.com/${owner}/${repo}/issues/${issue.number}`;
try {
console.log(
`[INFO] Auto-closing issue #${issue.number} as duplicate of #${duplicateIssueNumber}: ${issueUrl}`
);
await closeIssueAsDuplicate(owner, repo, issue.number, duplicateIssueNumber, token);
console.log(
`[SUCCESS] Successfully closed issue #${issue.number} as duplicate of #${duplicateIssueNumber}`
);
} catch (error) {
console.error(
`[ERROR] Failed to close issue #${issue.number} as duplicate: ${error}`
);
}
}
console.log(
`[DEBUG] Script completed. Processed ${processedCount} issues, found ${candidateCount} candidates for auto-close`
);
}
autoCloseDuplicates().catch(console.error);
// Make it a module
export {};

View File

@@ -0,0 +1,213 @@
#!/usr/bin/env bun
declare global {
var process: {
env: Record<string, string | undefined>;
};
}
interface GitHubIssue {
number: number;
title: string;
state: string;
state_reason?: string;
user: { id: number };
created_at: string;
closed_at?: string;
}
interface GitHubComment {
id: number;
body: string;
created_at: string;
user: { type: string; id: number };
}
async function githubRequest<T>(endpoint: string, token: string, method: string = 'GET', body?: any): Promise<T> {
const response = await fetch(`https://api.github.com${endpoint}`, {
method,
headers: {
Authorization: `Bearer ${token}`,
Accept: "application/vnd.github.v3+json",
"User-Agent": "backfill-duplicate-comments-script",
...(body && { "Content-Type": "application/json" }),
},
...(body && { body: JSON.stringify(body) }),
});
if (!response.ok) {
throw new Error(
`GitHub API request failed: ${response.status} ${response.statusText}`
);
}
return response.json();
}
async function triggerDedupeWorkflow(
owner: string,
repo: string,
issueNumber: number,
token: string,
dryRun: boolean = true
): Promise<void> {
if (dryRun) {
console.log(`[DRY RUN] Would trigger dedupe workflow for issue #${issueNumber}`);
return;
}
await githubRequest(
`/repos/${owner}/${repo}/actions/workflows/claude-dedupe-issues.yml/dispatches`,
token,
'POST',
{
ref: 'main',
inputs: {
issue_number: issueNumber.toString()
}
}
);
}
async function backfillDuplicateComments(): Promise<void> {
console.log("[DEBUG] Starting backfill duplicate comments script");
const token = process.env.GITHUB_TOKEN;
if (!token) {
throw new Error(`GITHUB_TOKEN environment variable is required
Usage:
GITHUB_TOKEN=your_token bun run scripts/backfill-duplicate-comments.ts
Environment Variables:
GITHUB_TOKEN - GitHub personal access token with repo and actions permissions (required)
DRY_RUN - Set to "false" to actually trigger workflows (default: true for safety)
MAX_ISSUE_NUMBER - Only process issues with numbers less than this value (default: 4050)`);
}
console.log("[DEBUG] GitHub token found");
const owner = "anthropics";
const repo = "claude-code";
const dryRun = process.env.DRY_RUN !== "false";
const maxIssueNumber = parseInt(process.env.MAX_ISSUE_NUMBER || "4050", 10);
const minIssueNumber = parseInt(process.env.MIN_ISSUE_NUMBER || "1", 10);
console.log(`[DEBUG] Repository: ${owner}/${repo}`);
console.log(`[DEBUG] Dry run mode: ${dryRun}`);
console.log(`[DEBUG] Looking at issues between #${minIssueNumber} and #${maxIssueNumber}`);
console.log(`[DEBUG] Fetching issues between #${minIssueNumber} and #${maxIssueNumber}...`);
const allIssues: GitHubIssue[] = [];
let page = 1;
const perPage = 100;
while (true) {
const pageIssues: GitHubIssue[] = await githubRequest(
`/repos/${owner}/${repo}/issues?state=all&per_page=${perPage}&page=${page}&sort=created&direction=desc`,
token
);
if (pageIssues.length === 0) break;
// Filter to only include issues within the specified range
const filteredIssues = pageIssues.filter(issue =>
issue.number >= minIssueNumber && issue.number < maxIssueNumber
);
allIssues.push(...filteredIssues);
// If the oldest issue in this page is still above our minimum, we need to continue
// but if the oldest issue is below our minimum, we can stop
const oldestIssueInPage = pageIssues[pageIssues.length - 1];
if (oldestIssueInPage && oldestIssueInPage.number >= maxIssueNumber) {
console.log(`[DEBUG] Oldest issue in page #${page} is #${oldestIssueInPage.number}, continuing...`);
} else if (oldestIssueInPage && oldestIssueInPage.number < minIssueNumber) {
console.log(`[DEBUG] Oldest issue in page #${page} is #${oldestIssueInPage.number}, below minimum, stopping`);
break;
} else if (filteredIssues.length === 0 && pageIssues.length > 0) {
console.log(`[DEBUG] No issues in page #${page} are in range #${minIssueNumber}-#${maxIssueNumber}, continuing...`);
}
page++;
// Safety limit to avoid infinite loops
if (page > 200) {
console.log("[DEBUG] Reached page limit, stopping pagination");
break;
}
}
console.log(`[DEBUG] Found ${allIssues.length} issues between #${minIssueNumber} and #${maxIssueNumber}`);
let processedCount = 0;
let candidateCount = 0;
let triggeredCount = 0;
for (const issue of allIssues) {
processedCount++;
console.log(
`[DEBUG] Processing issue #${issue.number} (${processedCount}/${allIssues.length}): ${issue.title}`
);
console.log(`[DEBUG] Fetching comments for issue #${issue.number}...`);
const comments: GitHubComment[] = await githubRequest(
`/repos/${owner}/${repo}/issues/${issue.number}/comments`,
token
);
console.log(
`[DEBUG] Issue #${issue.number} has ${comments.length} comments`
);
// Look for existing duplicate detection comments (from the dedupe bot)
const dupeDetectionComments = comments.filter(
(comment) =>
comment.body.includes("Found") &&
comment.body.includes("possible duplicate") &&
comment.user.type === "Bot"
);
console.log(
`[DEBUG] Issue #${issue.number} has ${dupeDetectionComments.length} duplicate detection comments`
);
// Skip if there's already a duplicate detection comment
if (dupeDetectionComments.length > 0) {
console.log(
`[DEBUG] Issue #${issue.number} already has duplicate detection comment, skipping`
);
continue;
}
candidateCount++;
const issueUrl = `https://github.com/${owner}/${repo}/issues/${issue.number}`;
try {
console.log(
`[INFO] ${dryRun ? '[DRY RUN] ' : ''}Triggering dedupe workflow for issue #${issue.number}: ${issueUrl}`
);
await triggerDedupeWorkflow(owner, repo, issue.number, token, dryRun);
if (!dryRun) {
console.log(
`[SUCCESS] Successfully triggered dedupe workflow for issue #${issue.number}`
);
}
triggeredCount++;
} catch (error) {
console.error(
`[ERROR] Failed to trigger workflow for issue #${issue.number}: ${error}`
);
}
// Add a delay between workflow triggers to avoid overwhelming the system
await new Promise(resolve => setTimeout(resolve, 1000));
}
console.log(
`[DEBUG] Script completed. Processed ${processedCount} issues, found ${candidateCount} candidates without duplicate comments, ${dryRun ? 'would trigger' : 'triggered'} ${triggeredCount} workflows`
);
}
backfillDuplicateComments().catch(console.error);
// Make it a module
export {};