Spaces:

pritamdeka
/

Biomedical-Fact-Checker

Sleeping

App Files Files Community

pritamdeka commited on Jul 4

Commit

1db0637

verified ·

1 Parent(s): 887bb88

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -38

app.py CHANGED Viewed

@@ -9,30 +9,9 @@ from nltk.tokenize import sent_tokenize
 from newspaper import Article
 from sentence_transformers import SentenceTransformer, util
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
-# --- Download GGUF model from Hugging Face Hub at startup (if not present) ---
-from huggingface_hub import hf_hub_download
-GGUF_FILENAME = "gemma-3b-it-Q4_K_M.gguf"
-GGUF_REPO = "unsloth/gemma-3n-E4B-it-GGUF"
-print("Checking for GGUF model...")
-gguf_path = hf_hub_download(
-    repo_id=GGUF_REPO,
-    filename=GGUF_FILENAME,
-    cache_dir="./"
-)
-print(f"GGUF model path: {gguf_path}")
-# Load Llama GGUF model via llama-cpp-python
-from llama_cpp import Llama
-llm = Llama(
-    model_path=gguf_path,
-    n_ctx=2048,
-    n_threads=4  # or set to number of CPU cores
-)
 # --------- App settings ---------
 PUBMED_N = 100    # Number of abstracts to retrieve initially
 TOP_ABSTRACTS = 10  # Number of top semantic abstracts to keep per claim
@@ -70,6 +49,16 @@ nli_tokenizer = AutoTokenizer.from_pretrained(NLI_MODEL_NAME)
 nli_model = AutoModelForSequenceClassification.from_pretrained(NLI_MODEL_NAME)
 sbert_model = SentenceTransformer(SBERT_MODEL_NAME)
 def extract_claims_pattern(article_text):
     sentences = sent_tokenize(article_text)
     claims = [
@@ -144,23 +133,29 @@ def extract_evidence_nli(claim, title, abstract):
 def summarize_evidence_llm(claim, evidence_list):
     support = [ev['sentence'] for ev in evidence_list if ev['label'] == 'ENTAILMENT']
     contradict = [ev['sentence'] for ev in evidence_list if ev['label'] == 'CONTRADICTION']
-    prompt = (
-        f"Claim: {claim}\n"
-        f"Supporting evidence:\n" + ("\n".join(support) if support else "None") + "\n"
-        f"Contradicting evidence:\n" + ("\n".join(contradict) if contradict else "None") + "\n"
-        "Explain to a layperson: Is this claim likely true, false, or uncertain based on the evidence above? "
-        "Give a brief and simple explanation in 2-3 sentences."
-    )
     try:
-        output = llm(
-            prompt,
-            max_tokens=128,
-            stop=["\n\n"],
-            temperature=0.4,
-            echo=False
         )
-        summary = output['choices'][0]['text'].strip()
-        return summary
     except Exception as e:
         return f"Summary could not be generated: {e}"
@@ -225,7 +220,7 @@ This app extracts key scientific claims from a news article, finds the most rele
 3. For each claim, you will see:<br>
 - A plain summary of what research says.<br>
 - Color-coded evidence sentences (green=support, red=contradict, gray=neutral).<br>
-- Links to original PubMed research.<br><br>
 <b>Everything is 100% open source and runs on this website—no personal info or cloud API needed.</b>
 """

 from newspaper import Article
 from sentence_transformers import SentenceTransformer, util
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
 import torch
 # --------- App settings ---------
 PUBMED_N = 100    # Number of abstracts to retrieve initially
 TOP_ABSTRACTS = 10  # Number of top semantic abstracts to keep per claim
 nli_model = AutoModelForSequenceClassification.from_pretrained(NLI_MODEL_NAME)
 sbert_model = SentenceTransformer(SBERT_MODEL_NAME)
+# --- Load fast Llama-3.2-1B-Instruct summarizer pipeline ---
+model_id = "meta-llama/Llama-3.2-1B-Instruct"
+pipe = pipeline(
+    "text-generation",
+    model=model_id,
+    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+    device_map="auto",
+    max_new_tokens=128,
+)
 def extract_claims_pattern(article_text):
     sentences = sent_tokenize(article_text)
     claims = [
 def summarize_evidence_llm(claim, evidence_list):
     support = [ev['sentence'] for ev in evidence_list if ev['label'] == 'ENTAILMENT']
     contradict = [ev['sentence'] for ev in evidence_list if ev['label'] == 'CONTRADICTION']
+    # Compose prompt for summarization.
+    messages = [
+        {"role": "system", "content": "You are a helpful biomedical assistant. Summarize scientific evidence in plain English for the general public."},
+        {"role": "user", "content":
+            f"Claim: {claim}\n"
+            f"Supporting evidence:\n" + ("\n".join(support) if support else "None") + "\n"
+            f"Contradicting evidence:\n" + ("\n".join(contradict) if contradict else "None") + "\n"
+            "Explain to a layperson: Is this claim likely true, false, or uncertain based on the evidence above? Give a brief and simple explanation in 2-3 sentences."
+        }
+    ]
     try:
+        outputs = pipe(
+            messages,
+            max_new_tokens=96,
+            do_sample=False,
+            temperature=0.1,
         )
+        out = outputs[0]["generated_text"]
+        # If the model returns all messages, just take the last message (often the answer).
+        if isinstance(out, list) and "content" in out[-1]:
+            return out[-1]["content"].strip()
+        return out.strip()
     except Exception as e:
         return f"Summary could not be generated: {e}"
 3. For each claim, you will see:<br>
 - A plain summary of what research says.<br>
 - Color-coded evidence sentences (green=support, red=contradict, gray=neutral).<br>
+- The titles of the most relevant PubMed articles.<br><br>
 <b>Everything is 100% open source and runs on this website—no personal info or cloud API needed.</b>
 """