Merge branch 'main' into main

This commit is contained in:
Ambika Sukla
2024-06-12 09:56:53 -04:00
committed by GitHub

View File

@@ -4,26 +4,26 @@ RUN apt-get update && apt-get -y --no-install-recommends install libgomp1
ENV APP_HOME /app
# install Java
RUN mkdir -p /usr/share/man/man1 && \
apt-get update -y && \
apt-get install -y openjdk-17-jre-headless
apt-get update -y && \
apt-get install -y openjdk-17-jre-headless
# install essential packages
RUN apt-get install -y \
libxml2-dev libxslt-dev \
build-essential libmagic-dev
libxml2-dev libxslt-dev \
build-essential libmagic-dev
# install tesseract
RUN apt-get install -y \
tesseract-ocr \
lsb-release \
&& echo "deb https://notesalexp.org/tesseract-ocr5/$(lsb_release -cs)/ $(lsb_release -cs) main" | tee /etc/apt/sources.list.d/notesalexp.list > /dev/null \
&& apt-get update -oAcquire::AllowInsecureRepositories=true \
&& apt-get install notesalexp-keyring -oAcquire::AllowInsecureRepositories=true -y --allow-unauthenticated \
&& apt-get update \
&& apt-get install -y \
tesseract-ocr libtesseract-dev \
&& wget -P /usr/share/tesseract-ocr/5/tessdata/ https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata
tesseract-ocr \
lsb-release \
&& echo "deb https://notesalexp.org/tesseract-ocr5/$(lsb_release -cs)/ $(lsb_release -cs) main" | tee /etc/apt/sources.list.d/notesalexp.list > /dev/null \
&& apt-get update -oAcquire::AllowInsecureRepositories=true \
&& apt-get install notesalexp-keyring -oAcquire::AllowInsecureRepositories=true -y --allow-unauthenticated \
&& apt-get update \
&& apt-get install -y \
tesseract-ocr libtesseract-dev \
&& wget -P /usr/share/tesseract-ocr/5/tessdata/ https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata
RUN apt-get install unzip -y && \
apt-get install git -y && \
apt-get autoremove -y
apt-get install git -y && \
apt-get autoremove -y
WORKDIR ${APP_HOME}
COPY . ./
RUN pip install --upgrade pip setuptools
@@ -34,4 +34,5 @@ RUN python -m nltk.downloader stopwords
RUN python -m nltk.downloader punkt
RUN python -c "import tiktoken; tiktoken.get_encoding(\"cl100k_base\")"
RUN chmod +x run.sh
EXPOSE 5001
CMD ./run.sh