mirror of
https://github.com/jupyter-naas/awesome-notebooks.git
synced 2024-05-28 02:00:17 +03:00
331 lines
8.6 KiB
Plaintext
331 lines
8.6 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "177a878f-37b4-4931-a05e-1781d7d11c29",
|
|
"metadata": {
|
|
"execution": {
|
|
"iopub.execute_input": "2021-02-23T14:22:16.610471Z",
|
|
"iopub.status.busy": "2021-02-23T14:22:16.610129Z",
|
|
"iopub.status.idle": "2021-02-23T14:22:16.627784Z",
|
|
"shell.execute_reply": "2021-02-23T14:22:16.626866Z",
|
|
"shell.execute_reply.started": "2021-02-23T14:22:16.610384Z"
|
|
},
|
|
"papermill": {},
|
|
"tags": []
|
|
},
|
|
"source": [
|
|
"<img width=\"8%\" alt=\"GitHub.png\" src=\"https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/.github/assets/logos/GitHub.png\" style=\"border-radius: 15%\">"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "e74d4d6a-9b35-4e41-8617-5124decf3bc9",
|
|
"metadata": {
|
|
"papermill": {},
|
|
"tags": []
|
|
},
|
|
"source": [
|
|
"# GitHub - Get most starred repos\n",
|
|
"<a href=\"https://bit.ly/3JyWIk6\">Give Feedback</a> | <a href=\"https://github.com/jupyter-naas/awesome-notebooks/issues/new?assignees=&labels=bug&template=bug_report.md&title=GitHub+-+Get+most+starred+repos:+Error+short+description\">Bug report</a>"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "e56f09a7-a3fe-4c4a-9bf0-7f7b280e030f",
|
|
"metadata": {
|
|
"papermill": {},
|
|
"tags": []
|
|
},
|
|
"source": [
|
|
"**Tags:** #github #repos #stars #snippet"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "56a78043-5105-4b22-8e6b-5c853777e1b4",
|
|
"metadata": {
|
|
"papermill": {},
|
|
"tags": []
|
|
},
|
|
"source": [
|
|
"**Author:** [Sanjeet Attili](https://www.linkedin.com/in/sanjeet-attili-760bab190)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "6142adaa-cdeb-4f01-a57a-6a16084ec466",
|
|
"metadata": {
|
|
"papermill": {},
|
|
"tags": []
|
|
},
|
|
"source": [
|
|
"**Last update:** 2023-04-12 (Created: 2022-06-06)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "naas-description",
|
|
"metadata": {
|
|
"papermill": {},
|
|
"tags": [
|
|
"description"
|
|
]
|
|
},
|
|
"source": [
|
|
"**Description:** This notebook provides a list of the most popular GitHub repositories based on the number of stars they have received."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "5225edb4-ead5-4c32-9386-c12e3d38da28",
|
|
"metadata": {
|
|
"papermill": {},
|
|
"tags": []
|
|
},
|
|
"source": [
|
|
"## Input"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "3299f3b7-330f-4145-8d8e-f9163e8c7c64",
|
|
"metadata": {
|
|
"papermill": {},
|
|
"tags": []
|
|
},
|
|
"source": [
|
|
"### Import libraries"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "8de808ad-6416-4cc4-9085-d373246c745d",
|
|
"metadata": {
|
|
"papermill": {},
|
|
"tags": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import requests\n",
|
|
"import pandas as pd\n",
|
|
"import plotly.express as px\n",
|
|
"import naas"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "bf7b7a18-a011-4468-b6cc-7c3127ee417e",
|
|
"metadata": {
|
|
"papermill": {},
|
|
"tags": []
|
|
},
|
|
"source": [
|
|
"### Setup Variables"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "32b38b7b-4526-40f4-8474-5ca1920b760f",
|
|
"metadata": {
|
|
"papermill": {},
|
|
"tags": []
|
|
},
|
|
"source": [
|
|
"* The Github search API provides up to 1,000 results for each search.\n",
|
|
"\n",
|
|
"* Please visit [this link](https://docs.github.com/en/rest/search#about-the-search-api) to know more about Github search limitation."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "1a95c818-e23e-4690-867c-e3ecd332dcf6",
|
|
"metadata": {
|
|
"papermill": {},
|
|
"tags": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Query number of repositories with stars greater than the given threshold\n",
|
|
"threshold = 500 # provides list of repos with stars greater than 500\n",
|
|
"\n",
|
|
"# Setup how many top repository results are to be shown\n",
|
|
"top_n = 250\n",
|
|
"\n",
|
|
"# if you want to fetch all the repository results with the\n",
|
|
"# given threshold instead of top_n number, then put in top_n value to 'all'\n",
|
|
"\n",
|
|
"# Github token\n",
|
|
"GITHUB_TOKEN = None or naas.secret.get(\"GITHUB_TOKEN\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "3f0db106-0361-4672-ba06-2d9f6525be08",
|
|
"metadata": {
|
|
"papermill": {},
|
|
"tags": []
|
|
},
|
|
"source": [
|
|
"## Model"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "85edc940-b57b-425d-a693-6f7942ddbe7b",
|
|
"metadata": {
|
|
"papermill": {},
|
|
"tags": []
|
|
},
|
|
"source": [
|
|
"### Get most starred repos"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "824580df-24f1-4f97-b9a3-818ad478a312",
|
|
"metadata": {
|
|
"papermill": {},
|
|
"tags": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"def fetch_results(top_n, threshold, token):\n",
|
|
" URL = (\n",
|
|
" f\"https://api.github.com/search/repositories?q=stars:%3E{threshold}&sort=stars\"\n",
|
|
" )\n",
|
|
" headers = {\"Authorization\": f\"token {token}\"}\n",
|
|
" df = pd.DataFrame()\n",
|
|
" cnt, page = 0, 1\n",
|
|
"\n",
|
|
" while True:\n",
|
|
" params = {\n",
|
|
" \"state\": \"open\",\n",
|
|
" \"per_page\": \"100\",\n",
|
|
" \"page\": page,\n",
|
|
" }\n",
|
|
" res = requests.get(URL, headers=headers, params=params)\n",
|
|
" try:\n",
|
|
" res.raise_for_status()\n",
|
|
" except requests.HTTPError as e:\n",
|
|
" if \"422 Client Error: Unprocessable Entity for url:\" in str(e):\n",
|
|
" print(\"Github Search API limit reached!\")\n",
|
|
" print(\"Collecting the search results\")\n",
|
|
" break\n",
|
|
" res_json = res.json()\n",
|
|
"\n",
|
|
" for r in res_json[\"items\"]:\n",
|
|
" df.loc[cnt, \"repo_id\"] = r[\"id\"]\n",
|
|
" df.loc[cnt, \"name\"], df.loc[cnt, \"url\"] = r[\"name\"], r[\"html_url\"]\n",
|
|
" df.loc[cnt, \"stars\"], df.loc[cnt, \"forks\"], df.loc[cnt, \"issues_open\"] = (\n",
|
|
" r[\"watchers\"],\n",
|
|
" r[\"forks\"],\n",
|
|
" r[\"open_issues\"],\n",
|
|
" )\n",
|
|
" df.loc[cnt, \"created_at\"], df.loc[cnt, \"updated_at\"] = (\n",
|
|
" r[\"created_at\"],\n",
|
|
" r[\"updated_at\"],\n",
|
|
" )\n",
|
|
" if len(r[\"topics\"]):\n",
|
|
" df.loc[cnt, \"topics\"] = \",\".join(r[\"topics\"])\n",
|
|
" else:\n",
|
|
" df.loc[cnt, \"topics\"] = \"None\"\n",
|
|
"\n",
|
|
" if r[\"description\"]:\n",
|
|
" df.loc[cnt, \"description\"] = r[\"description\"]\n",
|
|
" else:\n",
|
|
" df.loc[cnt, \"description\"] = \"None\"\n",
|
|
"\n",
|
|
" cnt += 1\n",
|
|
" if cnt == top_n:\n",
|
|
" break\n",
|
|
"\n",
|
|
" if cnt == top_n:\n",
|
|
" break\n",
|
|
"\n",
|
|
" page += 1\n",
|
|
"\n",
|
|
" df.stars, df.forks, df.issues_open, df.repo_id = (\n",
|
|
" df.stars.astype(\"int\"),\n",
|
|
" df.forks.astype(\"int\"),\n",
|
|
" df.issues_open.astype(\"int\"),\n",
|
|
" df.repo_id.astype(\"int\"),\n",
|
|
" )\n",
|
|
" return df\n",
|
|
"\n",
|
|
"\n",
|
|
"df_results = fetch_results(top_n, threshold, GITHUB_TOKEN)\n",
|
|
"df_results.shape"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "ca4b723e-eb05-42d1-9882-3ab83279573b",
|
|
"metadata": {
|
|
"papermill": {},
|
|
"tags": []
|
|
},
|
|
"source": [
|
|
"## Output"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "443e61ad-8c87-418b-8752-4dc448458e38",
|
|
"metadata": {
|
|
"papermill": {},
|
|
"tags": []
|
|
},
|
|
"source": [
|
|
"### Display result"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "5de75d14-bcdd-43f2-b779-9b09aaf59b1f",
|
|
"metadata": {
|
|
"papermill": {},
|
|
"tags": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_results.head(10)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.9.6"
|
|
},
|
|
"naas": {
|
|
"notebook_id": "f8b2a5b06ad0b1bc4d25de7e3aa1130a926d75e334885b7f4ad7abe3b774087d",
|
|
"notebook_path": "GitHub/GitHub_Get_most_starred_repos.ipynb"
|
|
},
|
|
"papermill": {
|
|
"default_parameters": {},
|
|
"environment_variables": {},
|
|
"parameters": {},
|
|
"version": "2.3.4"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
} |