Spaces:

PreethiCarmelBosco
/

prem-sql-api

Build error

PreethiCarmelBosco commited on Nov 15, 2025

Commit

edd4ff1

verified ·

1 Parent(s): 28eb58c

a

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -1,31 +1,43 @@
-# --- 1. Use the official Hugging Face TGI image ---
-# This is a pre-built image with everything included.
-FROM ghcr.io/huggingface/text-generation-inference:latest
-# --- 2. Download the GGUF model using cURL ---
-# We use cURL (which is already in the image) to avoid
-# installing Python and causing version conflicts.
 WORKDIR /app
-# Get the HF_TOKEN from the build secrets
-ARG HF_TOKEN
-# Run the download command
 RUN --mount=type=secret,id=HF_TOKEN \
-    curl -L \
-         -H "Authorization: Bearer $(cat /run/secrets/HF_TOKEN)" \
-         "https://huggingface.co/mradermacher/prem-1B-SQL-GGUF/resolve/main/prem-1B-SQL.Q8_0.gguf" \
-         -o "prem-1B-SQL.Q8_0.gguf"
-# --- 3. Set the container's command to run TGI ---
-# This is the command that will run when the container starts.
-ENV MODEL_ID="/app/prem-1B-SQL.Q8_0.gguf"
 CMD [ \
-    "text-generation-launcher", \
-    "--model-id", "${MODEL_ID}", \
-    "--quantize", "gguf", \
     "--port", "8000", \
     "--host", "0.0.0.0", \
-    "--openai-api-key-env-var", "API_KEY" \
 ]

+# --- 1. Use a standard, reliable Python base ---
+FROM python:3.12-slim
 WORKDIR /app
+# --- 2. Install C/C++ build tools & venv ---
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    cmake \
+    python3-venv \
+    && rm -rf /var/lib/apt/lists/*
+# --- 3. Download the model first ---
+# This uses a safe, isolated venv just for downloading
+COPY download_model.py .
 RUN --mount=type=secret,id=HF_TOKEN \
+    sh -c 'python3 -m venv /tmp/downloader-venv && \
+           . /tmp/downloader-venv/bin/activate && \
+           pip install huggingface_hub && \
+           python3 download_model.py'
+# --- 4. Build llama-cpp-python (CPU-ONLY) ---
+# This is the CRITICAL FIX.
+# This forces a fast, CPU-only build that will not time out.
+ENV CMAKE_ARGS="-DLLAMA_CUDA=OFF"
+# This build step will now be fast (1-2 minutes)
+RUN pip install "llama-cpp-python[server]"
+# --- 5. Set the runtime command ---
+# Expose the port (matches README.md)
+EXPOSE 8000
+# This command runs the server
 CMD [ \
+    "python", \
+    "-m", "llama_cpp.server", \
+    "--model", "prem-1B-SQL.Q8_0.gguf", \
+    "--n_gpu_layers", "0", \
     "--port", "8000", \
     "--host", "0.0.0.0", \
+    "--api_key_env_var", "API_KEY" \
 ]