Knack-Scraper/transform/Dockerfile
2025-12-24 17:58:23 +01:00

51 lines
1.5 KiB
Docker

FROM python:3.12-slim
RUN mkdir -p /app /data /models
# Install build dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
g++ \
gfortran \
libopenblas-dev \
liblapack-dev \
pkg-config \
curl \
jq \
&& rm -rf /var/lib/apt/lists/*
ENV GLINER_MODEL_ID=urchade/gliner_multi-v2.1
ENV GLINER_MODEL_PATH=/models/gliner_multi-v2.1
ENV MINILM_MODEL_ID=sentence-transformers/all-MiniLM-L6-v2
ENV MINILM_MODEL_PATH=/models/all-MiniLM-L6-v2
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY .env .
RUN apt update -y
RUN apt install -y cron locales
# Ensure GLiNER helper scripts are available
COPY ensure_gliner_model.sh /usr/local/bin/ensure_gliner_model.sh
# Ensure MiniLM helper scripts are available
COPY ensure_minilm_model.sh /usr/local/bin/ensure_minilm_model.sh
COPY entrypoint.sh /usr/local/bin/entrypoint.sh
RUN chmod +x /usr/local/bin/ensure_gliner_model.sh /usr/local/bin/ensure_minilm_model.sh /usr/local/bin/entrypoint.sh
COPY *.py .
# Create cron job that runs every weekend (Sunday at 3 AM) 0 3 * * 0
# Testing every 30 Minutes */30 * * * *
RUN echo "*/30 * * * * cd /app && /usr/local/bin/python main.py >> /proc/1/fd/1 2>&1" > /etc/cron.d/knack-transform
RUN chmod 0644 /etc/cron.d/knack-transform
RUN crontab /etc/cron.d/knack-transform
# Persist models between container runs
VOLUME /models
CMD ["/usr/local/bin/entrypoint.sh"]
#CMD ["python", "main.py"]