diff --git a/docd/debian_ocr.sh b/docd/debian_ocr.sh new file mode 100755 index 0000000..9ca8d1d --- /dev/null +++ b/docd/debian_ocr.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env sh + +# Debian alternative build with OCR enabled. + +# Build context must be the GOPATH where docconv and gosseract are contained. + +# Build runs on the Docker image, which is more reliable when working from other +# OS than Linux. + +export NAME=docd +export VERSION=debian +export DOCKERFILE=$GOPATH/src/code.sajari.com/docconv/docd/debian_ocr/Dockerfile + +echo "Building ${NAME} for ${VERSION} with OCR enabled..." + +echo "GOPATH: ${GOPATH}" + +echo "Dockerfile: ${DOCKERFILE}" + +docker build \ + -t $NAME:ocr \ + -f $DOCKERFILE \ + $GOPATH diff --git a/docd/debian_ocr/Dockerfile b/docd/debian_ocr/Dockerfile new file mode 100644 index 0000000..c177c53 --- /dev/null +++ b/docd/debian_ocr/Dockerfile @@ -0,0 +1,27 @@ +FROM debian + +RUN apt-get update +RUN apt-get install -y zip +RUN apt-get install -y poppler-utils +RUN apt-get install -y wv +RUN apt-get install -y unrtf +RUN apt-get install -y tidy +RUN apt-get install -y lynx +RUN apt-get install -y libtesseract-dev +RUN apt-get install -y libleptonica-dev +RUN apt-get install -y tesseract-ocr-eng +RUN apt-get install -y git +RUN apt-get install -y golang + +# Build context must be the host GOPATH +COPY . /goworkspace + +WORKDIR /goworkspace/src/code.sajari.com/docconv/docd + +ENV GOPATH=/goworkspace + +RUN GOOS=linux GOARCH=amd64 go build -tags ocr -o /docd + +EXPOSE 8888 + +CMD ["/docd"]