added shields checker script

This commit is contained in:
James Briggs
2023-08-02 13:02:13 +08:00
parent bcfd133352
commit 358d879166
2 changed files with 117 additions and 0 deletions

21
scripts/README.md Normal file
View File

@@ -0,0 +1,21 @@
# Examples Admin Scripts
This directory contains helper scripts for admin of the /examples repo.
## Shields Checker
The beginning of every notebook in the examples repo should include a Colab and nbviewer shield to allow easy navigation to either service. To confirm validity and update changing links we can use the **Shields Checker** script.
To use the script navigate to the root directory of the examples repo and run the following in a terminal window:
```
python scripts/shields-checker.py run --path . --shield-error False --update False
```
This will run the shields checker script across all notebooks in the directory, it will not update shield links, and it will not raise an error if no shields are found in a notebook.
We can adjust the default parameters depending on our intended use:
* `--path` allows us to specify a specific directory like `learn` or `docs`. Default value is `.` (search all directories).
* `--shield-error` allows us to raise a `ValueError` if set to `True` and if no shields are found in a notebook. Default value is `False` which logs a warning to the console but does not raise an error.
* `--update` specifies whether shield links should be automatically updated. When set to `True` both Colab and nbviewer links will be updated *if* they are found to be invalid. Default value is `False` which only logs whether links are valid or not.

View File

@@ -0,0 +1,96 @@
import click
import logging
from pathlib import Path
import re
colab_link = re.compile(r"(?<=\[!\[Open In Colab\]\(https:\/\/colab\.research\.google\.com\/assets\/colab-badge\.svg\)]\()[\w:\/.-]+(?=\))")
nbviewer_link = re.compile(r"(?<=\[!\[Open nbviewer\]\(https:\/\/raw\.githubusercontent\.com\/pinecone-io\/examples\/master\/assets\/nbviewer-shield\.svg\)]\()[\w:\/.-]+(?=\))")
def link_valid(current_url: str, path: str, version: str) -> bool:
# generate correct link
if version == "colab":
url = f"https://colab.research.google.com/github/pinecone-io/examples/blob/master/{path}"
elif version == "nbviewer":
url = f"https://nbviewer.org/github/pinecone-io/examples/blob/master/{path}"
else:
raise ValueError("version must be one of colab or nbviewer")
# check if link is correct
return current_url == url, url
def link_update(current_url: str, path: str, version: str) -> dict:
# check link validity
valid, url = link_valid(current_url, path, version)
if not valid:
# if link is not correct, update it
with open(path, "r") as f:
content = f.read()
content = content.replace(current_url, url)
with open(path, "w") as f:
f.write(content)
return {
"updated": not valid,
"past_url": current_url,
"new_url": url
}
def handle_no_shield(path: str, version: str, shield_error: bool) -> None:
if shield_error:
raise ValueError(f"No {version} shield found in {path}")
else:
logging.warning(f"No {version} shield found in {path}")
@click.group(help="Shields CLI")
def cli():
pass
@click.command(help="Check if shields are up to date.")
@click.option("--update", default=False, help="Automatically update shield links.")
@click.option("--path", default=".", help="Path to check for shields.")
@click.option("--shield-error", default=False, help="Raise error if no shield is found.")
def run(update, path, shield_error):
logging.basicConfig(level=logging.INFO)
# get all notebook paths
paths = [str(x) for x in Path(path).glob("**/*.ipynb")]
logging.info(f"Found {len(paths)} notebooks")
# check each notebook for shields
for path in paths:
with open(path, "r") as f:
content = f.read()
# try to find shields
colab_url = colab_link.search(content)
if colab_url:
# if link exists, check it and update if incorrect
colab_url = colab_url.group(0)
if update:
info = link_update(colab_url, path, "colab")
else:
valid = link_valid(colab_url, path, "colab")
else:
handle_no_shield(path, "colab", shield_error)
# now check nbviewer link
nbviewer_url = nbviewer_link.search(content)
if nbviewer_url:
nbviewer_url = nbviewer_url.group(0)
if update:
info = link_update(nbviewer_url, path, "nbviewer")
if info["updated"]:
logging.info(f"Updated: {path}")
else:
logging.info(f"Passed: {path}")
else:
valid = link_valid(nbviewer_url, path, "nbviewer")
if valid:
logging.info(f"Passed: {path}")
else:
logging.warning(f"Failed: {path}")
else:
handle_no_shield(path, "nbviewer", shield_error)
cli.add_command(run)
if __name__ == "__main__":
cli()