1
0
mirror of https://github.com/deepset-ai/haystack.git synced 2022-02-20 23:31:40 +03:00

Allow Linux CI to push changes to forks (#2182)

* Add explicit reference to repo name to allow CI to push code back

* Run test matrix only on tested code changes

* Isolate the bot to check if it works

* Clarify situation with a comment

* Simplify autoformat.yml

* Add code and docs check

* Add git pull to make sure to fetch changes if they were created

* Add cache to autoformat.yml too

* Add information on forks in CONTRIBUTING.md

* Add a not about code quality tools in CONTRIBUTING.md

* Add image file types to the CI exclusion list

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
Sara Zan
2022-02-16 16:28:55 +01:00
committed by GitHub
parent b0d82e9fbb
commit 4e940be859
6 changed files with 269 additions and 127 deletions

90
.github/workflows/autoformat.yml vendored Normal file
View File

@@ -0,0 +1,90 @@
name: Code & Documentation Updates
on:
# Activate this workflow manually
workflow_dispatch:
# Activate this workflow at every push of code changes
# Note: using push instead of pull_request make the actions
# run on the contributor's actions instead of Haystack's.
# This is necessary for permission issues: Haystack's CI runners
# cannot push changes back to the source fork.
# TODO make sure this is still necessary later on.
push:
paths-ignore:
- '**/*.md'
- '**/*.txt'
jobs:
code-and-docs-updates:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Set up Python 3.7
uses: actions/setup-python@v2
with:
python-version: 3.7
- name: Cache
id: cache-python-env
uses: actions/cache@v2
with:
path: ${{ env.pythonLocation }}
# The cache will be rebuild every day and at every change of the dependency files
key: haystack-ci-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}
- name: Install Dependencies
run: |
pip install --upgrade pip
pip install .[test]
pip install rest_api/
pip install ui/
pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cpu.html
echo "=== pip freeze ==="
pip freeze
# Apply Black on the entire codebase
- name: Blacken
run: black .
# Convert the Jupyter notebooks into markdown tutorials
- name: Generate Tutorials
run: |
cd docs/_src/tutorials/tutorials/
python3 convert_ipynb.py
# Generate markdown files from the docstrings with pydoc-markdown
- name: Generate Docstrings
run: |
set -e # Fails on any error in the following loop
cd docs/_src/api/api/
for file in ../pydoc/* ; do
echo "Processing" $file
pydoc-markdown "$file"
done
# Generates the OpenAPI specs file to be used on the documentation website
- name: Generate OpenAPI Specs
run: |
pip install rest_api/
cd docs/_src/api/openapi/
python generate_openapi_specs.py
# Generates a new JSON schema for the pipeline YAML validation
- name: Generate JSON schema for pipelines
run: python ./.github/utils/generate_json_schema.py
# Commit the files to GitHub
- name: Commit files
run: |
git status
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
git config --local user.name "github-actions[bot]"
git add .
git commit -m "Update Documentation & Code Style" -a || echo "No changes to commit"
git push

View File

@@ -3,18 +3,60 @@ name: Linux CI
on:
# Activate this workflow manually
workflow_dispatch:
# Activate this workflow on every update of a PR
# Activate this workflow when the PR is opened and code is added to it
# Note: using pull_request instead of push to keep the CI workflows
# running on our repo, not the contributor's. See autoformat.yml
pull_request:
types:
- opened
- synchronize
# Activate this workflow on every push to master
paths-ignore:
- '**/*.md'
- '**/*.txt'
- '**/*.png'
- '**/*.gif'
# Activate this workflow on every push of code changes on master
push:
branches:
- master
paths-ignore:
- '**/*.md'
- '**/*.txt'
- '**/*.png'
- '**/*.gif'
jobs:
build-cache:
runs-on: ubuntu-20.04
steps:
- run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: 3.7
- name: Cache
id: cache-python-env
uses: actions/cache@v2
with:
path: ${{ env.pythonLocation }}
# The cache will be rebuild every day and at every change of the dependency files
key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}
- name: Install dependencies
if: steps.cache-python-env.outputs.cache-hit != 'true'
run: |
pip install --upgrade pip
pip install .[test]
pip install rest_api/
pip install ui/
pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cpu.html
echo "=== pip freeze ==="
pip freeze
type-check:
runs-on: ubuntu-20.04
steps:
@@ -51,119 +93,7 @@ jobs:
mypy ui --exclude=ui/build/ --exclude=ui/test/
build-cache:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: 3.7
- run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
- name: Cache
id: cache-python-env
uses: actions/cache@v2
with:
path: ${{ env.pythonLocation }}
# The cache will be rebuild every day and at every change of the dependency files
key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}
- name: Install dependencies
if: steps.cache-python-env.outputs.cache-hit != 'true'
run: |
pip install --upgrade pip
pip install .[test]
pip install rest_api/
pip install ui/
pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cpu.html
echo "=== pip freeze ==="
pip freeze
code-and-docs-updates:
needs: build-cache
runs-on: ubuntu-latest
if: ${{ github.event_name }} != "push"
steps:
- run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
- uses: actions/checkout@v2
with:
fetch-depth: 0
ref: ${{github.event.pull_request.head.ref}}
- name: Set up Python 3.7
uses: actions/setup-python@v2
with:
python-version: 3.7
- name: Cache Python
uses: actions/cache@v2
with:
path: ${{ env.pythonLocation }}
key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}
- name: Install Dependencies (on cache miss only)
# The cache might miss during the execution of an action: there should always be a fallback step to
# rebuild it in case it goes missing
if: steps.cache.outputs.cache-hit != 'true'
run: |
pip install --upgrade pip
pip install .[test]
pip install rest_api/
pip install ui/
pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cpu.html
echo "=== pip freeze ==="
pip freeze
# Apply Black on the entire codebase
- name: Blacken
run: black .
# Convert the Jupyter notebooks into markdown tutorials
- name: Generate Tutorials
run: |
cd docs/_src/tutorials/tutorials/
python3 convert_ipynb.py
# Generate markdown files from the docstrings with pydoc-markdown
- name: Generate Docstrings
run: |
set -e # Fails on any error in the following loop
cd docs/_src/api/api/
for file in ../pydoc/* ; do
echo "Processing" $file
pydoc-markdown "$file"
done
# Generates the OpenAPI specs file to be used on the documentation website
- name: Generate OpenAPI Specs
run: |
pip install rest_api/
cd docs/_src/api/openapi/
python generate_openapi_specs.py
# Generates a new JSON schema for the pipeline YAML validation
- name: Generate JSON schema for pipelines
run: python ./.github/utils/generate_json_schema.py
# Commit the files to GitHub
- name: Commit files
run: |
git status
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
git config --local user.name "github-actions[bot]"
git add .
git commit -m "Update Documentation & Code Style" -a || echo "No changes to commit"
git status
git push
linter:
needs: code-and-docs-updates
runs-on: ubuntu-20.04
steps:
@@ -198,6 +128,94 @@ jobs:
pylint -ry rest_api/
pylint -ry ui/
code-and-docs-check:
needs: build-cache
runs-on: ubuntu-latest
if: ${{ github.event_name }} != "push"
steps:
- run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
- uses: actions/checkout@v2
with:
fetch-depth: 0
repository: ${{github.event.pull_request.head.repo.full_name}}
ref: ${{ github.head_ref }}
- name: Set up Python 3.7
uses: actions/setup-python@v2
with:
python-version: 3.7
- name: Cache Python
uses: actions/cache@v2
with:
path: ${{ env.pythonLocation }}
key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}
- name: Install Dependencies (on cache miss only)
# The cache might miss during the execution of an action: there should always be a fallback step to
# rebuild it in case it goes missing
if: steps.cache.outputs.cache-hit != 'true'
run: |
pip install --upgrade pip
pip install .[test]
pip install rest_api/
pip install ui/
pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cpu.html
echo "=== pip freeze ==="
pip freeze
# Get any additional commit that might have been pushed in the meantime
- name: Pull changes (if any)
run: git pull origin ${{ github.head_ref }}
# Apply Black on the entire codebase
- name: Blacken
run: black .
# Convert the Jupyter notebooks into markdown tutorials
- name: Generate Tutorials
run: |
cd docs/_src/tutorials/tutorials/
python3 convert_ipynb.py
# Generate markdown files from the docstrings with pydoc-markdown
- name: Generate Docstrings
run: |
set -e # Fails on any error in the following loop
cd docs/_src/api/api/
for file in ../pydoc/* ; do
echo "Processing" $file
pydoc-markdown "$file"
done
# Generates the OpenAPI specs file to be used on the documentation website
- name: Generate OpenAPI Specs
run: |
pip install rest_api/
cd docs/_src/api/openapi/
python generate_openapi_specs.py
# Generates a new JSON schema for the pipeline YAML validation
- name: Generate JSON schema for pipelines
run: python ./.github/utils/generate_json_schema.py
# If there is anything to commit, fail
# Note: this CI action mirrors autoformat.yml, with the difference that it
# runs on Haystack's end. If the contributor hasn't run autoformat.yml, then this
# check will fail.
- name: Check git status
run: |
if [[ `git status --porcelain` ]]; then
git status
echo ""
echo "This means that the `autoformat.yml` action didn't run on the fork."
echo "Please enable GitHub Action on your fork to pass this check!"
exit 1
fi
prepare-matrix:
needs: build-cache

View File

@@ -20,7 +20,17 @@ Please give a concise description in the first comment in the PR that includes:
## Running tests
### CI
Tests will automatically run in our CI for every commit you push to your PR. This is the most convenient way for you and we encourage you to create early "WIP Pull requests".
Tests will automatically run in our CI for every commit you push to your PR. This is the most convenient way for you and we encourage you to create early "draft pull requests".
#### Forks
Some actions in our CI (code style and documentation updates) will run on your code and occasionally commit back small changes after a push. To be able to do so,
these actions are configured to run on your fork instead of on the base repository. To allow those actions to run, please don't forget to:
1. Enable actions on your fork with read and write permissions:
<p align="center"><img src="https://raw.githubusercontent.com/deepset-ai/haystack/master/docs/_src/img/fork_action_config.png"></p>
2. Verify that "Allow edits and access to secrets by maintainers" on the PR page's sidebar is checked
### Local
However, you can also run the tests locally by executing pytest in your terminal from the `/test` folder.
@@ -119,6 +129,30 @@ def test_elasticsearch_custom_fields(elasticsearch_fixture):
embedding_field="custom_embedding_field")
```
## Code format and style
We use [Black](https://github.com/psf/black) to ensure consistent code style, [mypy](http://mypy-lang.org/) for static type checking and
[pylint](https://pylint.org/) for linting and code quality.
All checks and autoformatting happen on the CI, so in general you don't need to worry about configuring them in your local environment.
However, should you prefer to execute them locally, here are a few details about the setup.
### Black
Black runs with no other configuration than an increase line lenght to 120 characters. Its condiguration can be found in `pyproject.toml`.
You can run it with `python -m black .` from the root folder.
### Mypy
Mypy currently runs with limited configuration options that can be found at the bottom of `setup.cfg`.
You can run it with `python -m mypy haystack/ rest_api/ ui/` from the root folder.
### Pylint
Pylint is still being integrated in Haystack. The current exclusion list is very long, and can be found in `pyproject.toml`.
You can run it with `python -m pylint haystack/ rest_api/ ui/ -ry` from the root folder.
## Contributor Licence Agreement (CLA)
Significant contributions to Haystack require a Contributor License Agreement (CLA). If the contribution requires a CLA, we will get in contact with you. CLAs are quite common among company backed open-source frameworks and our CLAs wording is similar to other popular projects, like [Rasa](https://cla-assistant.io/RasaHQ/rasa) or [Google's Tensorflow](https://cla.developers.google.com/clas/new?domain=DOMAIN_GOOGLE&kind=KIND_INDIVIDUAL) (retrieved 4th November 2021).

View File

@@ -93,7 +93,7 @@
"feedback"
],
"summary": "Get Feedback",
"description": "This endpoint allows the API user to retrieve all the\nfeedback that has been sumbitted through the\n`POST /feedback` endpoint",
"description": "This endpoint allows the API user to retrieve all the feedback that has been submitted\nthrough the `POST /feedback` endpoint.",
"operationId": "get_feedback_feedback_get",
"responses": {
"200": {
@@ -111,7 +111,7 @@
"feedback"
],
"summary": "Post Feedback",
"description": "This endpoint allows the API user to submit feedback on\nan answer for a particular query. For example, the user\ncan send feedback on whether the answer was correct and\nwhether the right snippet was identified as the answer.\nInformation submitted through this endpoint is used to\ntrain the underlying QA model.",
"description": "This endpoint allows the API user to submit feedback on an answer for a particular query.\n\nFor example, the user can send feedback on whether the answer was correct and\nwhether the right snippet was identified as the answer.\n\nInformation submitted through this endpoint is used to train the underlying QA model.",
"operationId": "post_feedback_feedback_post",
"requestBody": {
"content": {
@@ -177,7 +177,7 @@
"feedback"
],
"summary": "Get Feedback Metrics",
"description": "This endpoint returns basic accuracy metrics based on user feedback,\ne.g., the ratio of correct answers or correctly identified documents.\nYou can filter the output by document or label.\n\nExample:\n`curl --location --request POST 'http://127.0.0.1:8000/eval-doc-qa-feedback' --header 'Content-Type: application/json' --data-raw '{ \"filters\": {\"document_id\": [\"XRR3xnEBCYVTkbTystOB\"]} }'`",
"description": "This endpoint returns basic accuracy metrics based on user feedback,\ne.g., the ratio of correct answers or correctly identified documents.\nYou can filter the output by document or label.\n\nExample:\n\n`curl --location --request POST 'http://127.0.0.1:8000/eval-doc-qa-feedback' --header 'Content-Type: application/json' --data-raw '{ \"filters\": {\"document_id\": [\"XRR3xnEBCYVTkbTystOB\"]} }'`",
"operationId": "get_feedback_metrics_eval_feedback_post",
"requestBody": {
"content": {

Binary file not shown.

After

Width:  |  Height:  |  Size: 220 KiB

View File

@@ -8,7 +8,6 @@ from haystack.schema import Label
from rest_api.schema import FilterRequest, LabelSerialized, CreateLabelSerialized
from rest_api.controller.search import DOCUMENT_STORE
router = APIRouter()
logger = logging.getLogger(__name__)
@@ -17,13 +16,14 @@ logger = logging.getLogger(__name__)
@router.post("/feedback")
def post_feedback(feedback: Union[LabelSerialized, CreateLabelSerialized]):
"""
This endpoint allows the API user to submit feedback on
an answer for a particular query. For example, the user
can send feedback on whether the answer was correct and
This endpoint allows the API user to submit feedback on an answer for a particular query.
For example, the user can send feedback on whether the answer was correct and
whether the right snippet was identified as the answer.
Information submitted through this endpoint is used to
train the underlying QA model.
Information submitted through this endpoint is used to train the underlying QA model.
"""
if feedback.origin is None:
feedback.origin = "user-feedback"
@@ -34,9 +34,8 @@ def post_feedback(feedback: Union[LabelSerialized, CreateLabelSerialized]):
@router.get("/feedback")
def get_feedback():
"""
This endpoint allows the API user to retrieve all the
feedback that has been sumbitted through the
`POST /feedback` endpoint
This endpoint allows the API user to retrieve all the feedback that has been submitted
through the `POST /feedback` endpoint.
"""
labels = DOCUMENT_STORE.get_all_labels()
return labels
@@ -62,6 +61,7 @@ def get_feedback_metrics(filters: FilterRequest = None):
You can filter the output by document or label.
Example:
`curl --location --request POST 'http://127.0.0.1:8000/eval-doc-qa-feedback' \
--header 'Content-Type: application/json' \
--data-raw '{ "filters": {"document_id": ["XRR3xnEBCYVTkbTystOB"]} }'`