From 0e80410ea10db5ef5a4e34b7097e566ca4454ae4 Mon Sep 17 00:00:00 2001 From: Alan Casagrande Date: Mon, 18 Feb 2019 11:32:06 -0300 Subject: [PATCH] docker image with ocr enabled --- docd/debian_ocr.sh | 23 +++++++++++++++++++++++ docd/debian_ocr/Dockerfile | 27 +++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100755 docd/debian_ocr.sh create mode 100644 docd/debian_ocr/Dockerfile diff --git a/docd/debian_ocr.sh b/docd/debian_ocr.sh new file mode 100755 index 0000000..9ca8d1d --- /dev/null +++ b/docd/debian_ocr.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env sh + +# Debian alternative build with OCR enabled. + +# Build context must be the GOPATH where docconv and gosseract are contained. + +# Build runs on the Docker image, which is more reliable when working from other +# OS than Linux. + +export NAME=docd +export VERSION=debian +export DOCKERFILE=$GOPATH/src/code.sajari.com/docconv/docd/debian_ocr/Dockerfile + +echo "Building ${NAME} for ${VERSION} with OCR enabled..." + +echo "GOPATH: ${GOPATH}" + +echo "Dockerfile: ${DOCKERFILE}" + +docker build \ + -t $NAME:ocr \ + -f $DOCKERFILE \ + $GOPATH diff --git a/docd/debian_ocr/Dockerfile b/docd/debian_ocr/Dockerfile new file mode 100644 index 0000000..c177c53 --- /dev/null +++ b/docd/debian_ocr/Dockerfile @@ -0,0 +1,27 @@ +FROM debian + +RUN apt-get update +RUN apt-get install -y zip +RUN apt-get install -y poppler-utils +RUN apt-get install -y wv +RUN apt-get install -y unrtf +RUN apt-get install -y tidy +RUN apt-get install -y lynx +RUN apt-get install -y libtesseract-dev +RUN apt-get install -y libleptonica-dev +RUN apt-get install -y tesseract-ocr-eng +RUN apt-get install -y git +RUN apt-get install -y golang + +# Build context must be the host GOPATH +COPY . /goworkspace + +WORKDIR /goworkspace/src/code.sajari.com/docconv/docd + +ENV GOPATH=/goworkspace + +RUN GOOS=linux GOARCH=amd64 go build -tags ocr -o /docd + +EXPOSE 8888 + +CMD ["/docd"]