Knack-Scraper/transform/Dockerfile
2025-12-23 17:53:37 +01:00

46 lines
1.2 KiB
Docker

FROM python:3.12-slim
RUN mkdir -p /app /data /models
# Install build dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
g++ \
gfortran \
libopenblas-dev \
liblapack-dev \
pkg-config \
curl \
jq \
&& rm -rf /var/lib/apt/lists/*
ENV GLINER_MODEL_ID=urchade/gliner_multi-v2.1
ENV GLINER_MODEL_PATH=/models/gliner_multi-v2.1
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY .env .
RUN apt update -y
RUN apt install -y cron locales
# Ensure GLiNER helper scripts are available
COPY ensure_gliner_model.sh /usr/local/bin/ensure_gliner_model.sh
COPY entrypoint.sh /usr/local/bin/entrypoint.sh
RUN chmod +x /usr/local/bin/ensure_gliner_model.sh /usr/local/bin/entrypoint.sh
COPY *.py .
# Create cron job that runs every weekend (Sunday at 3 AM) 0 3 * * 0
# Testing every 30 Minutes */30 * * * *
RUN echo "*/30 * * * * cd /app && /usr/local/bin/python main.py >> /proc/1/fd/1 2>&1" > /etc/cron.d/knack-transform
RUN chmod 0644 /etc/cron.d/knack-transform
RUN crontab /etc/cron.d/knack-transform
# Persist models between container runs
VOLUME /models
CMD ["/usr/local/bin/entrypoint.sh"]
#CMD ["python", "main.py"]