ci(links): add link checking with lychee

- Configure lychee for notebook link validation
- Set up GitHub workflow for PR and scheduled checks
- Exclude API endpoints and localhost from checks
- Add PR comment integration for broken links
This commit is contained in:
Alex Notov
2025-09-05 19:00:20 -06:00
parent 3b522f033b
commit e0d22d077e
2 changed files with 100 additions and 0 deletions

65
.github/workflows/links.yml vendored Normal file
View File

@@ -0,0 +1,65 @@
name: Link Check
on:
pull_request:
types: [opened, synchronize]
schedule:
- cron: "0 0 * * SUN"
workflow_dispatch:
permissions:
contents: read
pull-requests: write
jobs:
check-links:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Convert notebooks for link extraction
run: |
pip install jupyter nbconvert
mkdir -p temp_md
for nb in skills/**/*.ipynb; do
echo "Converting: $nb"
jupyter nbconvert --to markdown "$nb" \
--output-dir=temp_md \
--ExtractOutputPreprocessor.enabled=False
done
- name: Check Links with Lychee
id: lychee
uses: lycheeverse/lychee-action@v2
with:
args: |
--config lychee.toml
--format markdown
--output lychee-report.md
--no-progress
skills/**/*.md
temp_md/*.md
README.md
fail: false
- name: Comment PR with results
if: github.event_name == 'pull_request' && steps.lychee.outputs.exit_code != 0
uses: marocchino/sticky-pull-request-comment@v2
with:
header: link-check
path: lychee-report.md
- name: Upload link check results
if: always()
uses: actions/upload-artifact@v4
with:
name: link-check-results
path: |
lychee-report.md
.lycheecache

35
lychee.toml Normal file
View File

@@ -0,0 +1,35 @@
# Lychee configuration for Anthropic Cookbook
cache = true
max_cache_age = "1d"
timeout = 30
max_redirects = 10
max_retries = 3
retry_wait_time = 2
include_fragments = true
skip_missing = false
# Jupyter-specific settings
glob_ignore_case = true
fallback_extensions = ["ipynb", "md", "html", "py"]
exclude_path = [
".git/",
"__pycache__/",
".ipynb_checkpoints/",
"*.pyc"
]
exclude = [
"https://api.anthropic.com.*",
"https://console.anthropic.com.*",
"http://localhost.*",
"http://127.0.0.1.*"
]
# Accept these status codes
accept = [200, 201, 202, 203, 204, 206, 301, 302, 303, 304, 307, 308, 403, 429]
# Headers for authenticated requests
headers = [
"User-Agent=Mozilla/5.0 (compatible; lychee/0.20.1; anthropic-cookbook)"
]