Spaces:

CletusOriakhi
/

afiya-ai-backend

Sleeping

App Files Files Community

CletusOriakhi commited on Oct 16

Commit

5ab07ae

verified ·

1 Parent(s): 5b050ed

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -48

app.py CHANGED Viewed

@@ -1,71 +1,81 @@
-# app.py - FINAL "Smart Proxy" Backend
 import gradio as gr
-from gradio_client import Client, handle_file
-import os
-# --- 1. Define the NEW, MORE RELIABLE Public Model ---
-# We are switching to the official Zephyr 7B model from Hugging Face.
-PUBLIC_MODEL_API = "https://huggingface-projects-zephyr-7b-beta.hf.space/run/chat"
-# --- 2. The "Smart" Function ---
-# This is where we add your custom context.
-def get_smart_response(message, history):
-    # 1. Create the custom prompt
-    # This turns the generic model into your specialized one.
-    system_prompt = (
-        "You are Healthify AI, a helpful and compassionate medical assistant "
-        "providing information for users in Nigeria. Your answers should be "
-        "safe, clear, and relevant to the Nigerian context. Always include a "
-        "disclaimer that you are an AI and not a substitute for a real doctor."
     )
-    # 2. Format the history for the public model
-    history_formatted = []
-    for user_msg, bot_msg in history:
-        history_formatted.append([user_msg, bot_msg])
     try:
-        # 3. Connect to the public model
-        client = Client(PUBLIC_MODEL_API)
-        # 4. Call the public model with our special prompt
-        # This API takes the same arguments, so the code doesn't need to change.
-        result = client.predict(
-            message=message,
-            history=history_formatted,
-            system_prompt=system_prompt, # Here is our custom instruction!
-            api_name="/chat"
         )
-        # 5. The public model returns the full history. We just need the last answer.
-        bot_response = result[-1][1]
     except Exception as e:
-        print(f"Error calling public API: {e}")
-        bot_response = "Sorry, I'm having trouble connecting to the AI model right now. Please try again in a moment."
-    # 6. Stream the final, high-quality answer back to the user
-    for char in bot_response:
-        yield char
 # --- 3. The Gradio Interface ---
 # This function handles the chat logic.
 def handle_chat(message, history):
-    history.append((message, ""))
-    # Get the full response by iterating through the generator
-    full_response = ""
-    for char in get_smart_response(message, history):
-        full_response += char
-        history[-1] = (message, full_response)
-        yield history
 # --- 4. Build the Gradio App ---
 # This is what your frontend will connect to.
 with gr.Blocks(theme=gr.themes.Base()) as demo:
-    gr.Markdown("# Healthify AI 🇳🇬 - Backend")
-    gr.Markdown("This backend is a proxy that adds Nigerian context to a public model.")
     chatbot = gr.Chatbot(label="Healthify AI Chat")
@@ -77,7 +87,7 @@ with gr.Blocks(theme=gr.themes.Base()) as demo:
         )
         submit_button = gr.Button("Send", variant="primary", scale=1)
-    submit_button.click(handle_chat, [prompt_input, chatbot], chatbot)
-    prompt_input.submit(handle_chat, [prompt_input, chatbot], chatbot)
 demo.queue().launch()

+# app.py - FINAL Self-Contained Backend
 import gradio as gr
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+from threading import Thread
+# --- 1. Load the Model and Tokenizer ---
+# We are loading a small, fast model directly into our app.
+# This will run on a free CPU Space.
+print("Loading model... This may take a moment.")
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilgpt2")
+model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
+# Set the pad token to the end-of-sentence token
+if tokenizer.pad_token is None:
+    tokenizer.pad_token = tokenizer.eos_token
+print("Model loaded successfully!")
+# --- 2. The Inference Function ---
+# This function runs the model on the CPU.
+def get_response(message, history):
+    # 1. Create a simple prompt.
+    prompt = (
+        "You are Healthify AI, a medical assistant. "
+        "Provide a clear and concise answer to the user's question.\n\n"
+        f"Question: {message}\n\nAnswer:"
     )
     try:
+        # 2. Tokenize the prompt
+        input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+        # 3. Generate the response
+        # Generation on a CPU is slow, so we limit the response length.
+        outputs = model.generate(
+            input_ids,
+            max_new_tokens=100,  # Keep this low for faster CPU response
+            pad_token_id=tokenizer.eos_token_id,
+            do_sample=True,
+            top_p=0.9,
+            temperature=0.7
         )
+        # 4. Decode the full response
+        full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # 5. Extract *only* the answer part
+        # We find the "Answer:" part and return everything after it.
+        answer_marker = "Answer:"
+        answer_start = full_response.find(answer_marker)
+        if answer_start != -1:
+            bot_response = full_response[answer_start + len(answer_marker):].strip()
+        else:
+            bot_response = "I'm not sure how to respond to that." # Fallback
     except Exception as e:
+        print(f"Error during generation: {e}")
+        bot_response = "Sorry, I encountered an error while processing your request."
+    return bot_response
 # --- 3. The Gradio Interface ---
 # This function handles the chat logic.
 def handle_chat(message, history):
+    # This is not a streaming call anymore.
+    # It will wait for the full response and then return it.
+    bot_response = get_response(message, history)
+    history.append((message, bot_response))
+    return history
 # --- 4. Build the Gradio App ---
 # This is what your frontend will connect to.
 with gr.Blocks(theme=gr.themes.Base()) as demo:
+    gr.Markdown("# Healthify AI 🇳🇬 - Backend (Self-Contained)")
+    gr.Markdown("This backend runs its own model on a CPU.")
     chatbot = gr.Chatbot(label="Healthify AI Chat")
         )
         submit_button = gr.Button("Send", variant="primary", scale=1)
+    submit_button.click(handle_chat, [prompt_input, chatbot], chatbot, show_progress="full")
+    prompt_input.submit(handle_chat, [prompt_input, chatbot], chatbot, show_progress="full")
 demo.queue().launch()