mirror of
https://github.com/ahmetoner/whisper-asr-webservice.git
synced 2023-04-14 03:48:29 +03:00
Merge branch 'main' into feature-enable-prompt
This commit is contained in:
@@ -1,6 +1,5 @@
|
||||
FROM python:3.9.9-slim
|
||||
FROM python:3.11-slim
|
||||
|
||||
ENV POETRY_VERSION=1.2.0
|
||||
ENV POETRY_VENV=/app/.venv
|
||||
|
||||
RUN export DEBIAN_FRONTEND=noninteractive \
|
||||
@@ -11,11 +10,7 @@ RUN export DEBIAN_FRONTEND=noninteractive \
|
||||
|
||||
RUN python3 -m venv $POETRY_VENV \
|
||||
&& $POETRY_VENV/bin/pip install -U pip setuptools \
|
||||
&& $POETRY_VENV/bin/pip install poetry==${POETRY_VERSION}
|
||||
|
||||
ARG TARGETPLATFORM
|
||||
RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then $POETRY_VENV/bin/pip install torch==1.13.0 -f https://download.pytorch.org/whl/cpu; fi;
|
||||
RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then $POETRY_VENV/bin/pip install torch==1.13.0; fi;
|
||||
&& $POETRY_VENV/bin/pip install poetry
|
||||
|
||||
ENV PATH="${PATH}:${POETRY_VENV}/bin"
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
FROM nvidia/cuda:11.7.0-base-ubuntu20.04
|
||||
FROM nvidia/cuda:11.7.0-base-ubuntu22.04
|
||||
|
||||
ENV PYTHON_VERSION=3.9
|
||||
ENV POETRY_VERSION=1.2.0
|
||||
ENV PYTHON_VERSION=3.11
|
||||
ENV POETRY_VENV=/app/.venv
|
||||
|
||||
RUN export DEBIAN_FRONTEND=noninteractive \
|
||||
@@ -19,8 +18,7 @@ RUN ln -s -f /usr/bin/python${PYTHON_VERSION} /usr/bin/python3 && \
|
||||
|
||||
RUN python3 -m venv $POETRY_VENV \
|
||||
&& $POETRY_VENV/bin/pip install -U pip setuptools \
|
||||
&& $POETRY_VENV/bin/pip install poetry==${POETRY_VERSION} \
|
||||
&& $POETRY_VENV/bin/pip install torch==1.13.0+cu117 -f https://download.pytorch.org/whl/torch
|
||||
&& $POETRY_VENV/bin/pip install poetry
|
||||
|
||||
ENV PATH="${PATH}:${POETRY_VENV}/bin"
|
||||
|
||||
@@ -31,4 +29,6 @@ COPY . /app
|
||||
RUN poetry config virtualenvs.in-project true
|
||||
RUN poetry install
|
||||
|
||||
RUN $POETRY_VENV/bin/pip install torch==1.13.0+cu117 -f https://download.pytorch.org/whl/torch
|
||||
|
||||
ENTRYPOINT ["gunicorn", "--bind", "0.0.0.0:9000", "--workers", "1", "--timeout", "0", "app.webservice:app", "-k", "uvicorn.workers.UvicornWorker"]
|
||||
14
README.md
14
README.md
@@ -43,15 +43,13 @@ For English-only applications, the `.en` models tend to perform better, especial
|
||||
Install poetry with following command:
|
||||
|
||||
```sh
|
||||
pip3 install poetry==1.2.2
|
||||
pip3 install poetry
|
||||
```
|
||||
|
||||
Install torch with following command:
|
||||
|
||||
```sh
|
||||
# for cpu:
|
||||
pip3 install torch==1.13.0+cpu -f https://download.pytorch.org/whl/torch
|
||||
# for gpu:
|
||||
# just for GPU:
|
||||
pip3 install torch==1.13.0+cu117 -f https://download.pytorch.org/whl/torch
|
||||
```
|
||||
|
||||
@@ -64,16 +62,16 @@ poetry install
|
||||
Starting the Webservice:
|
||||
|
||||
```sh
|
||||
gunicorn --bind 0.0.0.0:9001 --workers 1 --timeout 0 app.webservice:app -k uvicorn.workers.UvicornWorker
|
||||
poetry run gunicorn --bind 0.0.0.0:9000 --workers 1 --timeout 0 app.webservice:app -k uvicorn.workers.UvicornWorker
|
||||
```
|
||||
|
||||
## Quick start
|
||||
|
||||
After running the docker image or `poetry run whisper_asr` interactive Swagger API documentation is available at [localhost:9000/docs](http://localhost:9000/docs)
|
||||
After running the docker image interactive Swagger API documentation is available at [localhost:9000/docs](http://localhost:9000/docs)
|
||||
|
||||
There are 2 endpoints available:
|
||||
|
||||
- /asr (JSON, SRT, VTT)
|
||||
- /asr (TXT, VTT, SRT, TSV, JSON)
|
||||
- /detect-language
|
||||
|
||||
## Automatic Speech recognition service /asr
|
||||
@@ -82,7 +80,7 @@ If you choose the **transcribe** task, transcribes the uploaded file. Both audio
|
||||
|
||||
Note that you can also upload video formats directly as long as they are supported by ffmpeg.
|
||||
|
||||
You can get SRT and VTT output as a file from /asr endpoint.
|
||||
You can get TXT, VTT, SRT, TSV and JSON output as a file from /asr endpoint.
|
||||
|
||||
You can provide the language or it will be automatically recognized.
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ from fastapi.responses import StreamingResponse, RedirectResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.openapi.docs import get_swagger_ui_html
|
||||
import whisper
|
||||
from whisper.utils import write_srt, write_vtt
|
||||
from whisper.utils import ResultWriter, WriteTXT, WriteSRT, WriteVTT, WriteTSV, WriteJSON
|
||||
from whisper import tokenizer
|
||||
import os
|
||||
from os import path
|
||||
@@ -65,25 +65,27 @@ def transcribe(
|
||||
task : Union[str, None] = Query(default="transcribe", enum=["transcribe", "translate"]),
|
||||
language: Union[str, None] = Query(default=None, enum=LANGUAGE_CODES),
|
||||
prompt: Union[str, None] = Query(default=None),
|
||||
output : Union[str, None] = Query(default="json", enum=["json", "vtt", "srt"]),
|
||||
output : Union[str, None] = Query(default="txt", enum=[ "txt", "vtt", "srt", "tsv", "json"]),
|
||||
):
|
||||
|
||||
result = run_asr(audio_file.file, task, language)
|
||||
result = run_asr(audio_file.file, task, language, prompt)
|
||||
filename = audio_file.filename.split('.')[0]
|
||||
myFile = StringIO()
|
||||
if(output == "srt"):
|
||||
srt_file = StringIO()
|
||||
write_srt(result["segments"], file = srt_file)
|
||||
srt_file.seek(0)
|
||||
return StreamingResponse(srt_file, media_type="text/plain",
|
||||
headers={'Content-Disposition': f'attachment; filename="{filename}.srt"'})
|
||||
WriteSRT(ResultWriter).write_result(result, file = myFile)
|
||||
elif(output == "vtt"):
|
||||
vtt_file = StringIO()
|
||||
write_vtt(result["segments"], file = vtt_file)
|
||||
vtt_file.seek(0)
|
||||
return StreamingResponse(vtt_file, media_type="text/plain",
|
||||
headers={'Content-Disposition': f'attachment; filename="{filename}.vtt"'})
|
||||
WriteVTT(ResultWriter).write_result(result, file = myFile)
|
||||
elif(output == "tsv"):
|
||||
WriteTSV(ResultWriter).write_result(result, file = myFile)
|
||||
elif(output == "json"):
|
||||
WriteJSON(ResultWriter).write_result(result, file = myFile)
|
||||
elif(output == "txt"):
|
||||
WriteTXT(ResultWriter).write_result(result, file = myFile)
|
||||
else:
|
||||
return result
|
||||
return 'Please select an output method!'
|
||||
myFile.seek(0)
|
||||
return StreamingResponse(myFile, media_type="text/plain",
|
||||
headers={'Content-Disposition': f'attachment; filename="{filename}.{output}"'})
|
||||
|
||||
|
||||
@app.post("/detect-language", tags=["Endpoints"])
|
||||
|
||||
1451
poetry.lock
generated
1451
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "whisper-asr-webservice"
|
||||
version = "1.0.5"
|
||||
version = "1.0.6"
|
||||
description = "Whisper ASR Webservice is a general-purpose speech recognition webservice."
|
||||
homepage = "https://github.com/ahmetoner/whisper-asr-webservice/"
|
||||
license = "https://github.com/ahmetoner/whisper-asr-webservice/blob/main/LICENCE"
|
||||
@@ -12,17 +12,18 @@ readme = "README.md"
|
||||
packages = [{ include = "app" }]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.9"
|
||||
python = "^3.11"
|
||||
unidecode = "^1.3.4"
|
||||
uvicorn = { extras = ["standard"], version = "^0.18.2" }
|
||||
gunicorn = "^20.1.0"
|
||||
whisper = {git = "https://github.com/openai/whisper.git", rev="b9265e5796f5d80c18d1f9231ab234225676780b"}
|
||||
openai-whisper = {git = "https://github.com/openai/whisper.git", rev="7858aa9c08d98f75575035ecd6481f462d66ca27"}
|
||||
tqdm = "^4.64.1"
|
||||
transformers = "^4.22.1"
|
||||
python-multipart = "^0.0.5"
|
||||
ffmpeg-python = "^0.2.0"
|
||||
fastapi = "^0.85.0"
|
||||
fastapi-offline-swagger-ui = {git = "https://github.com/ahmetoner/fastapi-offline-swagger-ui"}
|
||||
torch="^1.13.0"
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
pytest = "^6.2.5"
|
||||
|
||||
Reference in New Issue
Block a user