victorsconcious commited on
Commit
dd4f303
·
verified ·
1 Parent(s): 62a1fff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -52
app.py CHANGED
@@ -1,63 +1,45 @@
 
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
-
5
- import os
6
  from huggingface_hub import login
7
 
 
8
  login(os.environ["HF_TOKEN"])
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- # Load MedGemma
12
- MODEL_NAME = "google/medgemma-4b-it" # choose the model size you want. here
13
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
14
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="auto") # GPU if available
15
-
16
- # Function to generate response
17
- def medgemma_chat(prompt, max_length=200):
18
- """
19
- Generates medical responses from MedGemma.
20
-
21
- Args:
22
- prompt (str): Medical question, lab results, or patient info.
23
- max_length (int): Max number of tokens to generate.
24
-
25
- Returns:
26
- str: AI-generated response.
27
- """
28
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
29
- with torch.no_grad():
30
- output = model.generate(
31
- **inputs,
32
- max_length=max_length,
33
- do_sample=True,
34
- temperature=0.7,
35
- top_p=0.9
36
- )
37
- response = tokenizer.decode(output[0], skip_special_tokens=True)
38
- return response
39
-
40
- # Gradio UI
41
- iface = gr.Interface(
42
  fn=medgemma_chat,
43
- inputs=[
44
- gr.Textbox(lines=5, placeholder="Enter patient's info, lab results, or medical question here...", label="Input")
45
- ],
46
- outputs=[
47
- gr.Textbox(label="MedGemma Response")
48
- ],
49
- title="MedGemma Medical Assistant",
50
- description=(
51
- "Ask questions or provide patient information. "
52
- "MedGemma generates medical insights, summaries, and guidance. "
53
- "⚠️ For educational and research purposes only — not a substitute for professional medical advice."
54
- ),
55
- examples=[
56
- ["Patient: 45-year-old male, BMI 28, blood pressure 140/90, glucose 7.5 mmol/L. Suggest possible conditions."],
57
- ["Summarize the following lab report: Hemoglobin 11 g/dL, WBC 9 x10^9/L, Platelets 200 x10^9/L."]
58
- ],
59
- allow_flagging="never"
60
  )
61
 
62
  if __name__ == "__main__":
63
- iface.launch()
 
1
+ import os
2
  import gradio as gr
 
3
  import torch
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
 
5
  from huggingface_hub import login
6
 
7
+ # --- Authenticate with HF token (from Spaces Secrets) ---
8
  login(os.environ["HF_TOKEN"])
9
 
10
+ # --- Model setup ---
11
+ MODEL_ID = "google/medgemma-4b-it"
12
+
13
+ # 4-bit quantization config
14
+ bnb_config = BitsAndBytesConfig(
15
+ load_in_4bit=True,
16
+ bnb_4bit_use_double_quant=True,
17
+ bnb_4bit_quant_type="nf4",
18
+ bnb_4bit_compute_dtype=torch.bfloat16
19
+ )
20
+
21
+ # Load model + tokenizer with quantization
22
+ model = AutoModelForCausalLM.from_pretrained(
23
+ MODEL_ID,
24
+ quantization_config=bnb_config,
25
+ device_map="auto"
26
+ )
27
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
28
+
29
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
30
+
31
+ # --- Gradio app ---
32
+ def medgemma_chat(prompt):
33
+ outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7)
34
+ return outputs[0]["generated_text"]
35
 
36
+ demo = gr.Interface(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  fn=medgemma_chat,
38
+ inputs=gr.Textbox(label="Enter medical question", lines=4, placeholder="e.g. What are symptoms of malaria?"),
39
+ outputs=gr.Textbox(label="MedGemma Response"),
40
+ title="🧠 MedGemma (4-bit Quantized)",
41
+ description="Ask medical questions (research/demo use only). Running in 4-bit quantized mode for efficiency."
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  )
43
 
44
  if __name__ == "__main__":
45
+ demo.launch(server_name="0.0.0.0", server_port=7860)