Merge branch 'main' into feature-enable-prompt

This commit is contained in:
Ahmet Oner
2023-02-04 23:37:33 +01:00
committed by GitHub
6 changed files with 862 additions and 659 deletions

View File

@@ -1,6 +1,5 @@
FROM python:3.9.9-slim
FROM python:3.11-slim
ENV POETRY_VERSION=1.2.0
ENV POETRY_VENV=/app/.venv
RUN export DEBIAN_FRONTEND=noninteractive \
@@ -11,11 +10,7 @@ RUN export DEBIAN_FRONTEND=noninteractive \
RUN python3 -m venv $POETRY_VENV \
&& $POETRY_VENV/bin/pip install -U pip setuptools \
&& $POETRY_VENV/bin/pip install poetry==${POETRY_VERSION}
ARG TARGETPLATFORM
RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then $POETRY_VENV/bin/pip install torch==1.13.0 -f https://download.pytorch.org/whl/cpu; fi;
RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then $POETRY_VENV/bin/pip install torch==1.13.0; fi;
&& $POETRY_VENV/bin/pip install poetry
ENV PATH="${PATH}:${POETRY_VENV}/bin"

View File

@@ -1,7 +1,6 @@
FROM nvidia/cuda:11.7.0-base-ubuntu20.04
FROM nvidia/cuda:11.7.0-base-ubuntu22.04
ENV PYTHON_VERSION=3.9
ENV POETRY_VERSION=1.2.0
ENV PYTHON_VERSION=3.11
ENV POETRY_VENV=/app/.venv
RUN export DEBIAN_FRONTEND=noninteractive \
@@ -19,8 +18,7 @@ RUN ln -s -f /usr/bin/python${PYTHON_VERSION} /usr/bin/python3 && \
RUN python3 -m venv $POETRY_VENV \
&& $POETRY_VENV/bin/pip install -U pip setuptools \
&& $POETRY_VENV/bin/pip install poetry==${POETRY_VERSION} \
&& $POETRY_VENV/bin/pip install torch==1.13.0+cu117 -f https://download.pytorch.org/whl/torch
&& $POETRY_VENV/bin/pip install poetry
ENV PATH="${PATH}:${POETRY_VENV}/bin"
@@ -31,4 +29,6 @@ COPY . /app
RUN poetry config virtualenvs.in-project true
RUN poetry install
RUN $POETRY_VENV/bin/pip install torch==1.13.0+cu117 -f https://download.pytorch.org/whl/torch
ENTRYPOINT ["gunicorn", "--bind", "0.0.0.0:9000", "--workers", "1", "--timeout", "0", "app.webservice:app", "-k", "uvicorn.workers.UvicornWorker"]

View File

@@ -43,15 +43,13 @@ For English-only applications, the `.en` models tend to perform better, especial
Install poetry with following command:
```sh
pip3 install poetry==1.2.2
pip3 install poetry
```
Install torch with following command:
```sh
# for cpu:
pip3 install torch==1.13.0+cpu -f https://download.pytorch.org/whl/torch
# for gpu:
# just for GPU:
pip3 install torch==1.13.0+cu117 -f https://download.pytorch.org/whl/torch
```
@@ -64,16 +62,16 @@ poetry install
Starting the Webservice:
```sh
gunicorn --bind 0.0.0.0:9001 --workers 1 --timeout 0 app.webservice:app -k uvicorn.workers.UvicornWorker
poetry run gunicorn --bind 0.0.0.0:9000 --workers 1 --timeout 0 app.webservice:app -k uvicorn.workers.UvicornWorker
```
## Quick start
After running the docker image or `poetry run whisper_asr` interactive Swagger API documentation is available at [localhost:9000/docs](http://localhost:9000/docs)
After running the docker image interactive Swagger API documentation is available at [localhost:9000/docs](http://localhost:9000/docs)
There are 2 endpoints available:
- /asr (JSON, SRT, VTT)
- /asr (TXT, VTT, SRT, TSV, JSON)
- /detect-language
## Automatic Speech recognition service /asr
@@ -82,7 +80,7 @@ If you choose the **transcribe** task, transcribes the uploaded file. Both audio
Note that you can also upload video formats directly as long as they are supported by ffmpeg.
You can get SRT and VTT output as a file from /asr endpoint.
You can get TXT, VTT, SRT, TSV and JSON output as a file from /asr endpoint.
You can provide the language or it will be automatically recognized.

View File

@@ -3,7 +3,7 @@ from fastapi.responses import StreamingResponse, RedirectResponse
from fastapi.staticfiles import StaticFiles
from fastapi.openapi.docs import get_swagger_ui_html
import whisper
from whisper.utils import write_srt, write_vtt
from whisper.utils import ResultWriter, WriteTXT, WriteSRT, WriteVTT, WriteTSV, WriteJSON
from whisper import tokenizer
import os
from os import path
@@ -65,25 +65,27 @@ def transcribe(
task : Union[str, None] = Query(default="transcribe", enum=["transcribe", "translate"]),
language: Union[str, None] = Query(default=None, enum=LANGUAGE_CODES),
prompt: Union[str, None] = Query(default=None),
output : Union[str, None] = Query(default="json", enum=["json", "vtt", "srt"]),
output : Union[str, None] = Query(default="txt", enum=[ "txt", "vtt", "srt", "tsv", "json"]),
):
result = run_asr(audio_file.file, task, language)
result = run_asr(audio_file.file, task, language, prompt)
filename = audio_file.filename.split('.')[0]
myFile = StringIO()
if(output == "srt"):
srt_file = StringIO()
write_srt(result["segments"], file = srt_file)
srt_file.seek(0)
return StreamingResponse(srt_file, media_type="text/plain",
headers={'Content-Disposition': f'attachment; filename="{filename}.srt"'})
WriteSRT(ResultWriter).write_result(result, file = myFile)
elif(output == "vtt"):
vtt_file = StringIO()
write_vtt(result["segments"], file = vtt_file)
vtt_file.seek(0)
return StreamingResponse(vtt_file, media_type="text/plain",
headers={'Content-Disposition': f'attachment; filename="{filename}.vtt"'})
WriteVTT(ResultWriter).write_result(result, file = myFile)
elif(output == "tsv"):
WriteTSV(ResultWriter).write_result(result, file = myFile)
elif(output == "json"):
WriteJSON(ResultWriter).write_result(result, file = myFile)
elif(output == "txt"):
WriteTXT(ResultWriter).write_result(result, file = myFile)
else:
return result
return 'Please select an output method!'
myFile.seek(0)
return StreamingResponse(myFile, media_type="text/plain",
headers={'Content-Disposition': f'attachment; filename="{filename}.{output}"'})
@app.post("/detect-language", tags=["Endpoints"])

1451
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
[tool.poetry]
name = "whisper-asr-webservice"
version = "1.0.5"
version = "1.0.6"
description = "Whisper ASR Webservice is a general-purpose speech recognition webservice."
homepage = "https://github.com/ahmetoner/whisper-asr-webservice/"
license = "https://github.com/ahmetoner/whisper-asr-webservice/blob/main/LICENCE"
@@ -12,17 +12,18 @@ readme = "README.md"
packages = [{ include = "app" }]
[tool.poetry.dependencies]
python = "^3.9"
python = "^3.11"
unidecode = "^1.3.4"
uvicorn = { extras = ["standard"], version = "^0.18.2" }
gunicorn = "^20.1.0"
whisper = {git = "https://github.com/openai/whisper.git", rev="b9265e5796f5d80c18d1f9231ab234225676780b"}
openai-whisper = {git = "https://github.com/openai/whisper.git", rev="7858aa9c08d98f75575035ecd6481f462d66ca27"}
tqdm = "^4.64.1"
transformers = "^4.22.1"
python-multipart = "^0.0.5"
ffmpeg-python = "^0.2.0"
fastapi = "^0.85.0"
fastapi-offline-swagger-ui = {git = "https://github.com/ahmetoner/fastapi-offline-swagger-ui"}
torch="^1.13.0"
[tool.poetry.dev-dependencies]
pytest = "^6.2.5"