Skip to content

Commit

Permalink
Merge pull request #1 from bfourie/dockerfile-ocr
Browse files Browse the repository at this point in the history
DockerFile for docd with OCR
  • Loading branch information
bfourie authored Sep 5, 2022
2 parents aa395e1 + 7572739 commit f810eee
Showing 1 changed file with 37 additions and 0 deletions.
37 changes: 37 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
FROM docker.io/golang:1.14-buster AS build
RUN apt update && \
apt install -y software-properties-common && \
apt-get --assume-yes install apt-transport-https && \
apt-add-repository 'deb https://notesalexp.org/tesseract-ocr-dev/buster/ buster main' && \
apt-get update -oAcquire::AllowInsecureRepositories=true && \
apt-get --assume-yes --allow-unauthenticated install notesalexp-keyring -oAcquire::AllowInsecureRepositories=true && \
apt-get update && \
apt-get --assume-yes install tesseract-ocr libtesseract-dev tesseract-ocr-tha
WORKDIR /go/src/code.sajari.com/docconv/
COPY ./ ./
WORKDIR /go/src/code.sajari.com/docconv/docd/
RUN go get -t github.com/otiai10/gosseract/...
RUN go build -tags ocr .
FROM docker.io/debian:11-slim AS docd
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
lynx \
poppler-utils \
tidy \
unrtf \
wv \
zip \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
RUN apt update && \
apt install -y software-properties-common && \
apt-get --assume-yes install apt-transport-https && \
apt-add-repository 'deb https://notesalexp.org/tesseract-ocr-dev/bullseye/ bullseye main' && \
apt-get update -oAcquire::AllowInsecureRepositories=true && \
apt-get --assume-yes --allow-unauthenticated install notesalexp-keyring -oAcquire::AllowInsecureRepositories=true && \
apt-get update && \
apt-get --assume-yes install tesseract-ocr libtesseract-dev tesseract-ocr-tha
EXPOSE 8888
COPY --from=build /go/src/code.sajari.com/docconv/docd/docd /docd
ENTRYPOINT ["/docd"]
CMD ["--help"]

0 comments on commit f810eee

Please # to comment.