Allow Linux CI to push changes to forks (#2182)

* Add explicit reference to repo name to allow CI to push code back * Run test matrix only on tested code changes * Isolate the bot to check if it works * Clarify situation with a comment * Simplify autoformat.yml * Add code and docs check * Add git pull to make sure to fetch changes if they were created * Add cache to autoformat.yml too * Add information on forks in CONTRIBUTING.md * Add a not about code quality tools in CONTRIBUTING.md * Add image file types to the CI exclusion list Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
2022-02-20 23:31:40 +03:00 · 2022-02-16 16:28:55 +01:00
parent b0d82e9fbb
commit 4e940be859
6 changed files with 269 additions and 127 deletions
--- a/.github/workflows/autoformat.yml
+++ b/.github/workflows/autoformat.yml
@@ -0,0 +1,90 @@
+name: Code & Documentation Updates
+
+on:
+  # Activate this workflow manually
+  workflow_dispatch:
+  # Activate this workflow at every push of code changes
+  # Note: using push instead of pull_request make the actions
+  # run on the contributor's actions instead of Haystack's.
+  # This is necessary for permission issues: Haystack's CI runners 
+  # cannot push changes back to the source fork.
+  # TODO make sure this is still necessary later on.
+  push:
+    paths-ignore:
+      - '**/*.md'
+      - '**/*.txt'
+
+
+jobs:
+
+  code-and-docs-updates:
+    runs-on: ubuntu-latest
+    steps:
+
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+      
+      - name: Set up Python 3.7
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.7
+
+      - name: Cache
+        id: cache-python-env
+        uses: actions/cache@v2
+        with:
+          path: ${{ env.pythonLocation }}
+          # The cache will be rebuild every day and at every change of the dependency files
+          key: haystack-ci-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}
+
+      - name: Install Dependencies
+        run: |
+          pip install --upgrade pip
+          pip install .[test]
+          pip install rest_api/
+          pip install ui/
+          pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cpu.html
+          echo "=== pip freeze ==="
+          pip freeze
+          
+      # Apply Black on the entire codebase
+      - name: Blacken
+        run: black .
+
+      # Convert the Jupyter notebooks into markdown tutorials
+      - name: Generate Tutorials
+        run: |
+          cd docs/_src/tutorials/tutorials/
+          python3 convert_ipynb.py
+
+      # Generate markdown files from the docstrings with pydoc-markdown
+      - name: Generate Docstrings
+        run: |
+          set -e   # Fails on any error in the following loop
+          cd docs/_src/api/api/
+          for file in ../pydoc/* ; do
+            echo "Processing" $file
+            pydoc-markdown "$file"
+          done
+
+      # Generates the OpenAPI specs file to be used on the documentation website
+      - name: Generate OpenAPI Specs
+        run: |
+          pip install rest_api/
+          cd docs/_src/api/openapi/
+          python generate_openapi_specs.py
+
+      # Generates a new JSON schema for the pipeline YAML validation
+      - name: Generate JSON schema for pipelines
+        run: python ./.github/utils/generate_json_schema.py
+      
+      # Commit the files to GitHub
+      - name: Commit files
+        run: |
+          git status
+          git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
+          git config --local user.name "github-actions[bot]"
+          git add .
+          git commit -m "Update Documentation & Code Style" -a || echo "No changes to commit"
+          git push
--- a/.github/workflows/linux_ci.yml
+++ b/.github/workflows/linux_ci.yml
@@ -3,18 +3,60 @@ name: Linux CI
 on:
  # Activate this workflow manually
  workflow_dispatch:
-  # Activate this workflow on every update of a PR
+  # Activate this workflow when the PR is opened and code is added to it
+  # Note: using pull_request instead of push to keep the CI workflows 
+  # running on our repo, not the contributor's. See autoformat.yml
  pull_request:
    types:
      - opened
      - synchronize
-  # Activate this workflow on every push to master
+    paths-ignore:
+      - '**/*.md'
+      - '**/*.txt'
+      - '**/*.png'
+      - '**/*.gif'
+  # Activate this workflow on every push of code changes on master
  push:
    branches:
      - master
+    paths-ignore:
+      - '**/*.md'
+      - '**/*.txt'
+      - '**/*.png'
+      - '**/*.gif'

 jobs:

+  build-cache:
+    runs-on: ubuntu-20.04
+    steps:
+      - run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
+
+      - uses: actions/checkout@v2
+      - uses: actions/setup-python@v2
+        with:
+          python-version: 3.7
+
+      - name: Cache
+        id: cache-python-env
+        uses: actions/cache@v2
+        with:
+          path: ${{ env.pythonLocation }}
+          # The cache will be rebuild every day and at every change of the dependency files
+          key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}
+
+      - name: Install dependencies
+        if: steps.cache-python-env.outputs.cache-hit != 'true'
+        run: |
+          pip install --upgrade pip
+          pip install .[test]
+          pip install rest_api/
+          pip install ui/
+          pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cpu.html
+          echo "=== pip freeze ==="
+          pip freeze
+
+
  type-check:
    runs-on: ubuntu-20.04
    steps:
@@ -51,119 +93,7 @@ jobs:
          mypy ui --exclude=ui/build/ --exclude=ui/test/


-  build-cache:
-    runs-on: ubuntu-20.04
-    steps:
-      - uses: actions/checkout@v2
-      - uses: actions/setup-python@v2
-        with:
-          python-version: 3.7
-
-      - run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
-
-      - name: Cache
-        id: cache-python-env
-        uses: actions/cache@v2
-        with:
-          path: ${{ env.pythonLocation }}
-          # The cache will be rebuild every day and at every change of the dependency files
-          key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}
-
-      - name: Install dependencies
-        if: steps.cache-python-env.outputs.cache-hit != 'true'
-        run: |
-          pip install --upgrade pip
-          pip install .[test]
-          pip install rest_api/
-          pip install ui/
-          pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cpu.html
-          echo "=== pip freeze ==="
-          pip freeze
-
-
-  code-and-docs-updates:
-    needs: build-cache
-    runs-on: ubuntu-latest
-    if: ${{ github.event_name }} != "push"
-
-    steps:
-      - run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
-
-      - uses: actions/checkout@v2
-        with:
-          fetch-depth: 0
-          ref: ${{github.event.pull_request.head.ref}}
-      
-      - name: Set up Python 3.7
-        uses: actions/setup-python@v2
-        with:
-          python-version: 3.7
-
-      - name: Cache Python
-        uses: actions/cache@v2
-        with:
-          path: ${{ env.pythonLocation }}
-          key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}
-
-      - name: Install Dependencies (on cache miss only)
-        # The cache might miss during the execution of an action: there should always be a fallback step to
-        # rebuild it in case it goes missing
-        if: steps.cache.outputs.cache-hit != 'true'
-        run: |
-          pip install --upgrade pip
-          pip install .[test]
-          pip install rest_api/
-          pip install ui/
-          pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cpu.html
-          echo "=== pip freeze ==="
-          pip freeze
-          
-      # Apply Black on the entire codebase
-      - name: Blacken
-        run: black .
-
-      # Convert the Jupyter notebooks into markdown tutorials
-      - name: Generate Tutorials
-        run: |
-          cd docs/_src/tutorials/tutorials/
-          python3 convert_ipynb.py
-
-      # Generate markdown files from the docstrings with pydoc-markdown
-      - name: Generate Docstrings
-        run: |
-          set -e   # Fails on any error in the following loop
-          cd docs/_src/api/api/
-          for file in ../pydoc/* ; do
-            echo "Processing" $file
-            pydoc-markdown "$file"
-          done
-
-      # Generates the OpenAPI specs file to be used on the documentation website
-      - name: Generate OpenAPI Specs
-        run: |
-          pip install rest_api/
-          cd docs/_src/api/openapi/
-          python generate_openapi_specs.py
-
-      # Generates a new JSON schema for the pipeline YAML validation
-      - name: Generate JSON schema for pipelines
-        run: python ./.github/utils/generate_json_schema.py
-      
-      # Commit the files to GitHub
-      - name: Commit files
-        run: |
-          git status
-          git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
-          git config --local user.name "github-actions[bot]"
-
-          git add .
-          git commit -m "Update Documentation & Code Style" -a || echo "No changes to commit"
-          git status
-          git push
-
-
  linter:
-    needs: code-and-docs-updates
    runs-on: ubuntu-20.04
    steps:

@@ -198,6 +128,94 @@ jobs:
          pylint -ry rest_api/
          pylint -ry ui/

+  
+  code-and-docs-check:
+    needs: build-cache
+    runs-on: ubuntu-latest
+    if: ${{ github.event_name }} != "push"
+
+    steps:
+      - run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
+
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+          repository: ${{github.event.pull_request.head.repo.full_name}}
+          ref: ${{ github.head_ref }}
+      
+      - name: Set up Python 3.7
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.7
+
+      - name: Cache Python
+        uses: actions/cache@v2
+        with:
+          path: ${{ env.pythonLocation }}
+          key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}
+
+      - name: Install Dependencies (on cache miss only)
+        # The cache might miss during the execution of an action: there should always be a fallback step to
+        # rebuild it in case it goes missing
+        if: steps.cache.outputs.cache-hit != 'true'
+        run: |
+          pip install --upgrade pip
+          pip install .[test]
+          pip install rest_api/
+          pip install ui/
+          pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cpu.html
+          echo "=== pip freeze ==="
+          pip freeze
+          
+      # Get any additional commit that might have been pushed in the meantime
+      - name: Pull changes (if any)
+        run: git pull origin ${{ github.head_ref }}
+
+      # Apply Black on the entire codebase
+      - name: Blacken
+        run: black .
+
+      # Convert the Jupyter notebooks into markdown tutorials
+      - name: Generate Tutorials
+        run: |
+          cd docs/_src/tutorials/tutorials/
+          python3 convert_ipynb.py
+
+      # Generate markdown files from the docstrings with pydoc-markdown
+      - name: Generate Docstrings
+        run: |
+          set -e   # Fails on any error in the following loop
+          cd docs/_src/api/api/
+          for file in ../pydoc/* ; do
+            echo "Processing" $file
+            pydoc-markdown "$file"
+          done
+
+      # Generates the OpenAPI specs file to be used on the documentation website
+      - name: Generate OpenAPI Specs
+        run: |
+          pip install rest_api/
+          cd docs/_src/api/openapi/
+          python generate_openapi_specs.py
+
+      # Generates a new JSON schema for the pipeline YAML validation
+      - name: Generate JSON schema for pipelines
+        run: python ./.github/utils/generate_json_schema.py
+      
+      # If there is anything to commit, fail
+      # Note: this CI action mirrors autoformat.yml, with the difference that it
+      # runs on Haystack's end. If the contributor hasn't run autoformat.yml, then this
+      # check will fail.
+      - name: Check git status
+        run: |
+          if [[ `git status --porcelain` ]]; then
+            git status
+            echo ""
+            echo "This means that the `autoformat.yml` action didn't run on the fork."
+            echo "Please enable GitHub Action on your fork to pass this check!"
+            exit 1
+          fi
+

  prepare-matrix:
    needs: build-cache
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -20,7 +20,17 @@ Please give a concise description in the first comment in the PR that includes:
 ## Running tests

 ### CI
-Tests will automatically run in our CI for every commit you push to your PR. This is the most convenient way for you and we encourage you to create early "WIP Pull requests".
+Tests will automatically run in our CI for every commit you push to your PR. This is the most convenient way for you and we encourage you to create early "draft pull requests".
+
+#### Forks
+Some actions in our CI (code style and documentation updates) will run on your code and occasionally commit back small changes after a push. To be able to do so,
+these actions are configured to run on your fork instead of on the base repository. To allow those actions to run, please don't forget to:
+
+1. Enable actions on your fork with read and write permissions:
+
+<p align="center"><img src="https://raw.githubusercontent.com/deepset-ai/haystack/master/docs/_src/img/fork_action_config.png"></p>
+
+2. Verify that "Allow edits and access to secrets by maintainers" on the PR page's sidebar is checked

 ### Local
 However, you can also run the tests locally by executing pytest in your terminal from the `/test` folder.
@@ -119,6 +129,30 @@ def test_elasticsearch_custom_fields(elasticsearch_fixture):
                                                embedding_field="custom_embedding_field")
 ``` 

+
+## Code format and style
+We use [Black](https://github.com/psf/black) to ensure consistent code style, [mypy](http://mypy-lang.org/) for static type checking and 
+[pylint](https://pylint.org/) for linting and code quality.
+
+All checks and autoformatting happen on the CI, so in general you don't need to worry about configuring them in your local environment.
+However, should you prefer to execute them locally, here are a few details about the setup.
+
+### Black
+Black runs with no other configuration than an increase line lenght to 120 characters. Its condiguration can be found in `pyproject.toml`.
+
+You can run it with `python -m black .` from the root folder.
+
+### Mypy
+Mypy currently runs with limited configuration options that can be found at the bottom of `setup.cfg`.
+
+You can run it with `python -m mypy haystack/ rest_api/ ui/` from the root folder.
+
+### Pylint
+Pylint is still being integrated in Haystack. The current exclusion list is very long, and can be found in `pyproject.toml`.
+
+You can run it with `python -m pylint haystack/ rest_api/ ui/ -ry` from the root folder.
+
+
 ## Contributor Licence Agreement (CLA)

 Significant contributions to Haystack require a Contributor License Agreement (CLA). If the contribution requires a CLA, we will get in contact with you. CLAs are quite common among company backed open-source frameworks and our CLA’s wording is similar to other popular projects, like [Rasa](https://cla-assistant.io/RasaHQ/rasa) or [Google's Tensorflow](https://cla.developers.google.com/clas/new?domain=DOMAIN_GOOGLE&kind=KIND_INDIVIDUAL) (retrieved 4th November 2021). 
--- a/docs/_src/api/openapi/openapi.json
+++ b/docs/_src/api/openapi/openapi.json
@@ -93,7 +93,7 @@
                    "feedback"
                ],
                "summary": "Get Feedback",
-                "description": "This endpoint allows the API user to retrieve all the\nfeedback that has been sumbitted through the\n`POST /feedback` endpoint",
+                "description": "This endpoint allows the API user to retrieve all the feedback that has been submitted\nthrough the `POST /feedback` endpoint.",
                "operationId": "get_feedback_feedback_get",
                "responses": {
                    "200": {
@@ -111,7 +111,7 @@
                    "feedback"
                ],
                "summary": "Post Feedback",
-                "description": "This endpoint allows the API user to submit feedback on\nan answer for a particular query. For example, the user\ncan send feedback on whether the answer was correct and\nwhether the right snippet was identified as the answer.\nInformation submitted through this endpoint is used to\ntrain the underlying QA model.",
+                "description": "This endpoint allows the API user to submit feedback on an answer for a particular query.\n\nFor example, the user can send feedback on whether the answer was correct and\nwhether the right snippet was identified as the answer.\n\nInformation submitted through this endpoint is used to train the underlying QA model.",
                "operationId": "post_feedback_feedback_post",
                "requestBody": {
                    "content": {
@@ -177,7 +177,7 @@
                    "feedback"
                ],
                "summary": "Get Feedback Metrics",
-                "description": "This endpoint returns basic accuracy metrics based on user feedback,\ne.g., the ratio of correct answers or correctly identified documents.\nYou can filter the output by document or label.\n\nExample:\n`curl --location --request POST 'http://127.0.0.1:8000/eval-doc-qa-feedback'      --header 'Content-Type: application/json'      --data-raw '{ \"filters\": {\"document_id\": [\"XRR3xnEBCYVTkbTystOB\"]} }'`",
+                "description": "This endpoint returns basic accuracy metrics based on user feedback,\ne.g., the ratio of correct answers or correctly identified documents.\nYou can filter the output by document or label.\n\nExample:\n\n`curl --location --request POST 'http://127.0.0.1:8000/eval-doc-qa-feedback'      --header 'Content-Type: application/json'      --data-raw '{ \"filters\": {\"document_id\": [\"XRR3xnEBCYVTkbTystOB\"]} }'`",
                "operationId": "get_feedback_metrics_eval_feedback_post",
                "requestBody": {
                    "content": {
--- a/docs/_src/img/fork_action_config.png
+++ b/docs/_src/img/fork_action_config.png
--- a/rest_api/controller/feedback.py
+++ b/rest_api/controller/feedback.py
@@ -8,7 +8,6 @@ from haystack.schema import Label
 from rest_api.schema import FilterRequest, LabelSerialized, CreateLabelSerialized
 from rest_api.controller.search import DOCUMENT_STORE

-
 router = APIRouter()

 logger = logging.getLogger(__name__)
@@ -17,13 +16,14 @@ logger = logging.getLogger(__name__)
@router.post("/feedback")
 def post_feedback(feedback: Union[LabelSerialized, CreateLabelSerialized]):
    """
-    This endpoint allows the API user to submit feedback on
-    an answer for a particular query. For example, the user
-    can send feedback on whether the answer was correct and
+    This endpoint allows the API user to submit feedback on an answer for a particular query.
+
+    For example, the user can send feedback on whether the answer was correct and
    whether the right snippet was identified as the answer.
-    Information submitted through this endpoint is used to
-    train the underlying QA model.
+
+    Information submitted through this endpoint is used to train the underlying QA model.
    """
+
    if feedback.origin is None:
        feedback.origin = "user-feedback"

@@ -34,9 +34,8 @@ def post_feedback(feedback: Union[LabelSerialized, CreateLabelSerialized]):
@router.get("/feedback")
 def get_feedback():
    """
-    This endpoint allows the API user to retrieve all the
-    feedback that has been sumbitted through the
-    `POST /feedback` endpoint
+    This endpoint allows the API user to retrieve all the feedback that has been submitted
+    through the `POST /feedback` endpoint.
    """
    labels = DOCUMENT_STORE.get_all_labels()
    return labels
@@ -62,6 +61,7 @@ def get_feedback_metrics(filters: FilterRequest = None):
    You can filter the output by document or label.

    Example:
+
    `curl --location --request POST 'http://127.0.0.1:8000/eval-doc-qa-feedback' \
     --header 'Content-Type: application/json' \
     --data-raw '{ "filters": {"document_id": ["XRR3xnEBCYVTkbTystOB"]} }'`