aboutsummaryrefslogtreecommitdiffhomepage
path: root/Dockerfile
blob: 7b9f982f8330775e4dfea95b4e6d2ac1c438b371 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# First stage just copies the content of the git repo to /tmp so that we can copy it to the final image
# Note that if your folder with repository contains some large files, they should be added to .dockerignore
FROM ubuntu:jammy-20220428 AS intermediate

RUN apt-get update
RUN apt-get install git -y

WORKDIR /tmp
COPY . /tmp
RUN mkdir /output
RUN git ls-files | xargs cp -r --parents -t /output
COPY .git /output/.git

FROM ubuntu:jammy-20220428

ENV USER="user"
ENV NB_UID=1000
ENV NB_GID=1000
ENV HOME /home/${USER}

#installing dependencies
RUN apt-get update && apt-get upgrade -y
RUN apt-get install python3.10 -y
RUN apt-get install python3-venv -y
RUN apt-get install git -y
RUN apt-get install curl -y

# Install dependencies fo PyPDF2 and pdftotext
RUN DEBIAN_FRONTEND="noninteractive" apt-get -y install tzdata
RUN apt-get install build-essential libpoppler-cpp-dev pkg-config python3-dev -y
RUN apt-get install libqpdf-dev -y
RUN apt-get install default-jdk -y
RUN apt-get install tesseract-ocr tesseract-ocr-eng tesseract-ocr-deu tesseract-ocr-fra -y


RUN groupadd -g ${NB_GID} -o ${USER}
RUN adduser --disabled-password \
  --gecos "Default user" \
  --uid ${NB_UID} \
  --gid ${NB_GID} \
  ${USER}

RUN chown -R ${NB_UID}:${NB_GID} ${HOME}
USER ${USER}

# Get the intermediate files from the previous stage
RUN mkdir ${HOME}/sec-certs
WORKDIR ${HOME}/sec-certs
COPY --chown=${NB_UID}:${NB_GID} --from=intermediate /output ${HOME}/sec-certs

# Create virtual environment
ENV VENV_PATH=${HOME}/venv
RUN python3 -m venv ${VENV_PATH}
ENV PATH="${VENV_PATH}/bin:$PATH"
ENV UV_PROJECT_ENVIRONMENT=${VENV_PATH}

# Install dependencies, notebook is because of mybinder.org
RUN \
  pip install -U pip wheel uv && \
  uv sync && \
  pip install --no-cache notebook jupyterlab && \
  uv run spacy download en_core_web_sm

# just to be sure that pdftotext is in $PATH
ENV PATH /usr/bin/pdftotext:${PATH}

# Run the application:
ENTRYPOINT ["sec-certs"]