Merge branch 'main' into feature-enable-prompt

2023-04-14 03:48:29 +03:00 · 2023-02-04 23:37:33 +01:00
parent 31e1d85e64 1b15dc35e4
commit 86fcc217ad
6 changed files with 862 additions and 659 deletions
--- a/9
+++ b/9
@@ -1,6 +1,5 @@
-FROM python:3.9.9-slim
+FROM python:3.11-slim

-ENV POETRY_VERSION=1.2.0
 ENV POETRY_VENV=/app/.venv

 RUN export DEBIAN_FRONTEND=noninteractive \
@@ -11,11 +10,7 @@ RUN export DEBIAN_FRONTEND=noninteractive \

 RUN python3 -m venv $POETRY_VENV \
    && $POETRY_VENV/bin/pip install -U pip setuptools \
-    && $POETRY_VENV/bin/pip install poetry==${POETRY_VERSION}
-
-ARG TARGETPLATFORM
-RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then $POETRY_VENV/bin/pip install torch==1.13.0 -f https://download.pytorch.org/whl/cpu; fi;
-RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then $POETRY_VENV/bin/pip install torch==1.13.0; fi;
+    && $POETRY_VENV/bin/pip install poetry

 ENV PATH="${PATH}:${POETRY_VENV}/bin"

--- a/Dockerfile.gpu
+++ b/Dockerfile.gpu
@@ -1,7 +1,6 @@
-FROM nvidia/cuda:11.7.0-base-ubuntu20.04
+FROM nvidia/cuda:11.7.0-base-ubuntu22.04

-ENV PYTHON_VERSION=3.9
-ENV POETRY_VERSION=1.2.0
+ENV PYTHON_VERSION=3.11
 ENV POETRY_VENV=/app/.venv

 RUN export DEBIAN_FRONTEND=noninteractive \
@@ -19,8 +18,7 @@ RUN ln -s -f /usr/bin/python${PYTHON_VERSION} /usr/bin/python3 && \

 RUN python3 -m venv $POETRY_VENV \
    && $POETRY_VENV/bin/pip install -U pip setuptools \
-    && $POETRY_VENV/bin/pip install poetry==${POETRY_VERSION} \
-    && $POETRY_VENV/bin/pip install torch==1.13.0+cu117 -f https://download.pytorch.org/whl/torch
+    && $POETRY_VENV/bin/pip install poetry

 ENV PATH="${PATH}:${POETRY_VENV}/bin"

@@ -31,4 +29,6 @@ COPY . /app
 RUN poetry config virtualenvs.in-project true
 RUN poetry install

+RUN $POETRY_VENV/bin/pip install torch==1.13.0+cu117 -f https://download.pytorch.org/whl/torch
+
 ENTRYPOINT ["gunicorn", "--bind", "0.0.0.0:9000", "--workers", "1", "--timeout", "0", "app.webservice:app", "-k", "uvicorn.workers.UvicornWorker"]
--- a/README.md
+++ b/README.md
@@ -43,15 +43,13 @@ For English-only applications, the `.en` models tend to perform better, especial
 Install poetry with following command:

 ```sh
-pip3 install poetry==1.2.2
+pip3 install poetry
 ```

 Install torch with following command:

 ```sh
-# for cpu:
-pip3 install torch==1.13.0+cpu -f https://download.pytorch.org/whl/torch
-# for gpu:
+# just for GPU:
 pip3 install torch==1.13.0+cu117 -f https://download.pytorch.org/whl/torch
 ```

@@ -64,16 +62,16 @@ poetry install
 Starting the Webservice:

 ```sh
-gunicorn --bind 0.0.0.0:9001 --workers 1 --timeout 0 app.webservice:app -k uvicorn.workers.UvicornWorker
+poetry run gunicorn --bind 0.0.0.0:9000 --workers 1 --timeout 0 app.webservice:app -k uvicorn.workers.UvicornWorker
 ```

 ## Quick start

-After running the docker image or `poetry run whisper_asr` interactive Swagger API documentation is available at [localhost:9000/docs](http://localhost:9000/docs)
+After running the docker image interactive Swagger API documentation is available at [localhost:9000/docs](http://localhost:9000/docs)

 There are 2 endpoints available:

- /asr (JSON, SRT, VTT)
+- /asr (TXT, VTT, SRT, TSV, JSON)
 - /detect-language

 ## Automatic Speech recognition service /asr
@@ -82,7 +80,7 @@ If you choose the **transcribe** task, transcribes the uploaded file. Both audio

 Note that you can also upload video formats directly as long as they are supported by ffmpeg.

-You can get SRT and VTT output as a file from /asr endpoint.
+You can get TXT, VTT, SRT, TSV and JSON output as a file from /asr endpoint.

 You can provide the language or it will be automatically recognized.

--- a/app/webservice.py
+++ b/app/webservice.py
@@ -3,7 +3,7 @@ from fastapi.responses import StreamingResponse, RedirectResponse
 from fastapi.staticfiles import StaticFiles
 from fastapi.openapi.docs import get_swagger_ui_html
 import whisper
-from whisper.utils import write_srt, write_vtt
+from whisper.utils import ResultWriter, WriteTXT, WriteSRT, WriteVTT, WriteTSV, WriteJSON
 from whisper import tokenizer
 import os
 from os import path
@@ -65,25 +65,27 @@ def transcribe(
                task : Union[str, None] = Query(default="transcribe", enum=["transcribe", "translate"]),
                language: Union[str, None] = Query(default=None, enum=LANGUAGE_CODES),
                prompt: Union[str, None] = Query(default=None),
-                output : Union[str, None] = Query(default="json", enum=["json", "vtt", "srt"]),
+                output : Union[str, None] = Query(default="txt", enum=[ "txt", "vtt", "srt", "tsv", "json"]),
                ):

-    result = run_asr(audio_file.file, task, language)
+    result = run_asr(audio_file.file, task, language, prompt)
    filename = audio_file.filename.split('.')[0]
+    myFile = StringIO()
    if(output == "srt"):
-        srt_file = StringIO()
-        write_srt(result["segments"], file = srt_file)
-        srt_file.seek(0)
-        return StreamingResponse(srt_file, media_type="text/plain", 
-                                headers={'Content-Disposition': f'attachment; filename="{filename}.srt"'})
+        WriteSRT(ResultWriter).write_result(result, file = myFile)
    elif(output == "vtt"):
-        vtt_file = StringIO()
-        write_vtt(result["segments"], file = vtt_file)
-        vtt_file.seek(0)
-        return StreamingResponse(vtt_file, media_type="text/plain", 
-                                headers={'Content-Disposition': f'attachment; filename="{filename}.vtt"'})
+        WriteVTT(ResultWriter).write_result(result, file = myFile)
+    elif(output == "tsv"):
+        WriteTSV(ResultWriter).write_result(result, file = myFile)
+    elif(output == "json"):
+        WriteJSON(ResultWriter).write_result(result, file = myFile)
+    elif(output == "txt"):
+        WriteTXT(ResultWriter).write_result(result, file = myFile)
    else:
-        return result
+        return 'Please select an output method!'
+    myFile.seek(0)
+    return StreamingResponse(myFile, media_type="text/plain", 
+                            headers={'Content-Disposition': f'attachment; filename="{filename}.{output}"'})


@app.post("/detect-language", tags=["Endpoints"])
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "whisper-asr-webservice"
-version = "1.0.5"
+version = "1.0.6"
 description = "Whisper ASR Webservice is a general-purpose speech recognition webservice."
 homepage  = "https://github.com/ahmetoner/whisper-asr-webservice/"
 license = "https://github.com/ahmetoner/whisper-asr-webservice/blob/main/LICENCE"
@@ -12,17 +12,18 @@ readme = "README.md"
 packages = [{ include = "app" }]

 [tool.poetry.dependencies]
-python = "^3.9"
+python = "^3.11"
 unidecode = "^1.3.4"
 uvicorn = { extras = ["standard"], version = "^0.18.2" }
 gunicorn = "^20.1.0"
-whisper = {git = "https://github.com/openai/whisper.git", rev="b9265e5796f5d80c18d1f9231ab234225676780b"}
+openai-whisper = {git = "https://github.com/openai/whisper.git", rev="7858aa9c08d98f75575035ecd6481f462d66ca27"}
 tqdm = "^4.64.1"
 transformers = "^4.22.1"
 python-multipart = "^0.0.5"
 ffmpeg-python = "^0.2.0"
 fastapi = "^0.85.0"
 fastapi-offline-swagger-ui = {git = "https://github.com/ahmetoner/fastapi-offline-swagger-ui"}
+torch="^1.13.0"

 [tool.poetry.dev-dependencies]
 pytest = "^6.2.5"