CletusOriakhi commited on
Commit
5ab07ae
Β·
verified Β·
1 Parent(s): 5b050ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -48
app.py CHANGED
@@ -1,71 +1,81 @@
1
- # app.py - FINAL "Smart Proxy" Backend
2
 
3
  import gradio as gr
4
- from gradio_client import Client, handle_file
5
- import os
 
6
 
7
- # --- 1. Define the NEW, MORE RELIABLE Public Model ---
8
- # We are switching to the official Zephyr 7B model from Hugging Face.
9
- PUBLIC_MODEL_API = "https://huggingface-projects-zephyr-7b-beta.hf.space/run/chat"
 
 
 
10
 
11
- # --- 2. The "Smart" Function ---
12
- # This is where we add your custom context.
13
- def get_smart_response(message, history):
 
 
 
 
 
 
14
 
15
- # 1. Create the custom prompt
16
- # This turns the generic model into your specialized one.
17
- system_prompt = (
18
- "You are Healthify AI, a helpful and compassionate medical assistant "
19
- "providing information for users in Nigeria. Your answers should be "
20
- "safe, clear, and relevant to the Nigerian context. Always include a "
21
- "disclaimer that you are an AI and not a substitute for a real doctor."
22
  )
23
 
24
- # 2. Format the history for the public model
25
- history_formatted = []
26
- for user_msg, bot_msg in history:
27
- history_formatted.append([user_msg, bot_msg])
28
-
29
  try:
30
- # 3. Connect to the public model
31
- client = Client(PUBLIC_MODEL_API)
32
 
33
- # 4. Call the public model with our special prompt
34
- # This API takes the same arguments, so the code doesn't need to change.
35
- result = client.predict(
36
- message=message,
37
- history=history_formatted,
38
- system_prompt=system_prompt, # Here is our custom instruction!
39
- api_name="/chat"
 
 
40
  )
41
 
42
- # 5. The public model returns the full history. We just need the last answer.
43
- bot_response = result[-1][1]
44
 
 
 
 
 
 
 
 
 
 
45
  except Exception as e:
46
- print(f"Error calling public API: {e}")
47
- bot_response = "Sorry, I'm having trouble connecting to the AI model right now. Please try again in a moment."
48
 
49
- # 6. Stream the final, high-quality answer back to the user
50
- for char in bot_response:
51
- yield char
52
 
53
  # --- 3. The Gradio Interface ---
54
  # This function handles the chat logic.
55
  def handle_chat(message, history):
56
- history.append((message, ""))
57
- # Get the full response by iterating through the generator
58
- full_response = ""
59
- for char in get_smart_response(message, history):
60
- full_response += char
61
- history[-1] = (message, full_response)
62
- yield history
63
 
64
  # --- 4. Build the Gradio App ---
65
  # This is what your frontend will connect to.
66
  with gr.Blocks(theme=gr.themes.Base()) as demo:
67
- gr.Markdown("# Healthify AI πŸ‡³πŸ‡¬ - Backend")
68
- gr.Markdown("This backend is a proxy that adds Nigerian context to a public model.")
69
 
70
  chatbot = gr.Chatbot(label="Healthify AI Chat")
71
 
@@ -77,7 +87,7 @@ with gr.Blocks(theme=gr.themes.Base()) as demo:
77
  )
78
  submit_button = gr.Button("Send", variant="primary", scale=1)
79
 
80
- submit_button.click(handle_chat, [prompt_input, chatbot], chatbot)
81
- prompt_input.submit(handle_chat, [prompt_input, chatbot], chatbot)
82
 
83
  demo.queue().launch()
 
1
+ # app.py - FINAL Self-Contained Backend
2
 
3
  import gradio as gr
4
+ import torch
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
6
+ from threading import Thread
7
 
8
+ # --- 1. Load the Model and Tokenizer ---
9
+ # We are loading a small, fast model directly into our app.
10
+ # This will run on a free CPU Space.
11
+ print("Loading model... This may take a moment.")
12
+ tokenizer = AutoTokenizer.from_pretrained("distilbert/distilgpt2")
13
+ model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
14
 
15
+ # Set the pad token to the end-of-sentence token
16
+ if tokenizer.pad_token is None:
17
+ tokenizer.pad_token = tokenizer.eos_token
18
+
19
+ print("Model loaded successfully!")
20
+
21
+ # --- 2. The Inference Function ---
22
+ # This function runs the model on the CPU.
23
+ def get_response(message, history):
24
 
25
+ # 1. Create a simple prompt.
26
+ prompt = (
27
+ "You are Healthify AI, a medical assistant. "
28
+ "Provide a clear and concise answer to the user's question.\n\n"
29
+ f"Question: {message}\n\nAnswer:"
 
 
30
  )
31
 
 
 
 
 
 
32
  try:
33
+ # 2. Tokenize the prompt
34
+ input_ids = tokenizer(prompt, return_tensors="pt").input_ids
35
 
36
+ # 3. Generate the response
37
+ # Generation on a CPU is slow, so we limit the response length.
38
+ outputs = model.generate(
39
+ input_ids,
40
+ max_new_tokens=100, # Keep this low for faster CPU response
41
+ pad_token_id=tokenizer.eos_token_id,
42
+ do_sample=True,
43
+ top_p=0.9,
44
+ temperature=0.7
45
  )
46
 
47
+ # 4. Decode the full response
48
+ full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
49
 
50
+ # 5. Extract *only* the answer part
51
+ # We find the "Answer:" part and return everything after it.
52
+ answer_marker = "Answer:"
53
+ answer_start = full_response.find(answer_marker)
54
+ if answer_start != -1:
55
+ bot_response = full_response[answer_start + len(answer_marker):].strip()
56
+ else:
57
+ bot_response = "I'm not sure how to respond to that." # Fallback
58
+
59
  except Exception as e:
60
+ print(f"Error during generation: {e}")
61
+ bot_response = "Sorry, I encountered an error while processing your request."
62
 
63
+ return bot_response
 
 
64
 
65
  # --- 3. The Gradio Interface ---
66
  # This function handles the chat logic.
67
  def handle_chat(message, history):
68
+ # This is not a streaming call anymore.
69
+ # It will wait for the full response and then return it.
70
+ bot_response = get_response(message, history)
71
+ history.append((message, bot_response))
72
+ return history
 
 
73
 
74
  # --- 4. Build the Gradio App ---
75
  # This is what your frontend will connect to.
76
  with gr.Blocks(theme=gr.themes.Base()) as demo:
77
+ gr.Markdown("# Healthify AI πŸ‡³πŸ‡¬ - Backend (Self-Contained)")
78
+ gr.Markdown("This backend runs its own model on a CPU.")
79
 
80
  chatbot = gr.Chatbot(label="Healthify AI Chat")
81
 
 
87
  )
88
  submit_button = gr.Button("Send", variant="primary", scale=1)
89
 
90
+ submit_button.click(handle_chat, [prompt_input, chatbot], chatbot, show_progress="full")
91
+ prompt_input.submit(handle_chat, [prompt_input, chatbot], chatbot, show_progress="full")
92
 
93
  demo.queue().launch()