import os import torch from transformers import AutoTokenizer, AutoModelForCausalLM from peft import PeftModel # This script is run during the Docker build process to pre-download models. GENDER_MODEL_PATH = "google/gemma-3-270m-qat-q4_0-unquantized" BASE_MODEL_PATH = "unsloth/gemma-2b-it" # This correctly points to your fine-tuned model on the Hugging Face Hub. LORA_ADAPTER_PATH = "enoch10jason/gemma-grammar-lora" hf_token = os.getenv("HUGGING_FACE_HUB_TOKEN") if not hf_token: raise ValueError("HUGGING_FACE_HUB_TOKEN environment variable is required to download models.") print("--- Starting Model Pre-downloading ---") # 1. Download Gender Model print(f"Downloading: {GENDER_MODEL_PATH}") AutoTokenizer.from_pretrained(GENDER_MODEL_PATH, token=hf_token) AutoModelForCausalLM.from_pretrained(GENDER_MODEL_PATH, token=hf_token) print("✅ Gender model downloaded.") # 2. Download Grammar Base Model print(f"Downloading base model: {BASE_MODEL_PATH}") # We need to load the base model into memory to attach the adapter to it for caching. base_model = AutoModelForCausalLM.from_pretrained( BASE_MODEL_PATH, token=hf_token, dtype=torch.float32, ) AutoTokenizer.from_pretrained(BASE_MODEL_PATH, token=hf_token) print("✅ Base model downloaded.") # 3. Download Your Fine-Tuned LoRA Adapter print(f"Downloading LoRA adapter: {LORA_ADAPTER_PATH}") # This step downloads your private adapter and links it to the base model, caching it. PeftModel.from_pretrained(base_model, LORA_ADAPTER_PATH, token=hf_token) print("✅ LoRA adapter downloaded.") print("--- Model Pre-downloading Complete ---")