Knack-Scraper/transform/ensure_gte_model.sh
2026-01-27 20:19:05 +01:00

35 lines
1 KiB
Bash

#!/usr/bin/env bash
set -euo pipefail
if [ -d "$GTE_MODEL_PATH" ] && [ -f "$GTE_MODEL_PATH/config.json" ]; then
echo "GTE model already present at $GTE_MODEL_PATH"
exit 0
fi
echo "Downloading GTE model $GTE_MODEL_ID to $GTE_MODEL_PATH"
mkdir -p "$GTE_MODEL_PATH"
# Use Python with huggingface_hub for reliable model downloading
python3 << 'EOF'
import os
from huggingface_hub import snapshot_download
model_id = os.environ.get('GTE_MODEL_ID')
model_path = os.environ.get('GTE_MODEL_PATH')
if not model_id or not model_path:
raise ValueError(f"GTE_MODEL_ID and GTE_MODEL_PATH environment variables must be set")
try:
print(f"Downloading model {model_id} to {model_path}")
snapshot_download(
repo_id=model_id,
cache_dir=None, # Don't use cache, download directly
local_dir=model_path,
local_dir_use_symlinks=False # Don't use symlinks, copy files
)
print(f"Successfully downloaded GTE model to {model_path}")
except Exception as e:
print(f"Error downloading GTE model: {e}")
exit(1)
EOF