|
|
|
|
|
"""CiPE_Streamlit |
|
|
|
|
|
Automatically generated by Colaboratory. |
|
|
|
|
|
Original file is located at |
|
|
https://colab.research.google.com/drive/1jACLFXfsdWM59lrfTQGcZVsTIHBO92R8 |
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
!pip install langchain predictionguard lancedb html2text sentence-transformers PyPDF2 |
|
|
!pip install huggingface_hub |
|
|
!pip install transformers |
|
|
!pip install sentencepiece |
|
|
!pip install streamlit |
|
|
|
|
|
import os |
|
|
import urllib.request |
|
|
|
|
|
import html2text |
|
|
import predictionguard as pg |
|
|
from langchain import PromptTemplate, FewShotPromptTemplate |
|
|
from langchain.text_splitter import CharacterTextSplitter |
|
|
from sentence_transformers import SentenceTransformer |
|
|
import numpy as np |
|
|
import lancedb |
|
|
from lancedb.embeddings import with_embeddings |
|
|
import pandas as pd |
|
|
|
|
|
|
|
|
os.environ['PREDICTIONGUARD_TOKEN'] = "q1VuOjnffJ3NO2oFN8Q9m8vghYc84ld13jaqdF7E" |
|
|
|
|
|
|
|
|
|
|
|
import streamlit as st |
|
|
|
|
|
|
|
|
|
|
|
name = st.sidebar.text_input('Name') |
|
|
age = st.sidebar.number_input('Age', min_value=0, max_value=120, step=1) |
|
|
gender = st.sidebar.selectbox('Gender', ['Male', 'Female', 'Other']) |
|
|
ethnicity = st.sidebar.text_input('Ethnicity') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with st.form(key='patient_form'): |
|
|
|
|
|
disease = st.text_area('DISEASE', height=100) |
|
|
|
|
|
|
|
|
prescriptions = st.text_area('PRESCRIPTIONS', height=100) |
|
|
|
|
|
|
|
|
additional_info = st.text_area('ADDITIONAL INFO', height=100) |
|
|
|
|
|
|
|
|
submit_button = st.form_submit_button(label='Predict Drug Effects') |
|
|
|
|
|
from PyPDF2 import PdfReader |
|
|
|
|
|
|
|
|
pdf_path = '/content/drug_side_effects_summary_cleaned.pdf' |
|
|
reader = PdfReader(pdf_path) |
|
|
|
|
|
|
|
|
text = '' |
|
|
|
|
|
|
|
|
for page in reader.pages: |
|
|
|
|
|
text += page.extract_text() + "\n" |
|
|
|
|
|
|
|
|
print(text[:500]) |
|
|
|
|
|
import re |
|
|
|
|
|
|
|
|
def clean_text(text): |
|
|
|
|
|
text = re.sub(r'-\n', '', text) |
|
|
text = re.sub(r'\n', ' ', text) |
|
|
text = re.sub(r'\s+', ' ', text) |
|
|
text = text.strip() |
|
|
return text |
|
|
|
|
|
|
|
|
cleaned_text = clean_text(text) |
|
|
|
|
|
|
|
|
cleaned_text[:500] |
|
|
|
|
|
|
|
|
def chunk_text(text, chunk_size=700, overlap=50): |
|
|
chunks = [] |
|
|
start = 0 |
|
|
while start < len(text): |
|
|
|
|
|
if start > 0: |
|
|
start -= overlap |
|
|
end = start + chunk_size |
|
|
chunks.append(text[start:end]) |
|
|
start += chunk_size |
|
|
return chunks |
|
|
|
|
|
|
|
|
docs_alternative = chunk_text(cleaned_text, chunk_size=700, overlap=50) |
|
|
|
|
|
|
|
|
chunks_to_display_alt = 3 |
|
|
chunks_preview_alt = [docs_alternative[i] for i in range(min(len(docs_alternative), chunks_to_display_alt))] |
|
|
|
|
|
chunks_preview_alt |
|
|
|
|
|
|
|
|
chunks_preview_alt = [x.replace('#', '-') for x in chunks_preview_alt] |
|
|
|
|
|
|
|
|
name = "all-MiniLM-L12-v2" |
|
|
model = SentenceTransformer(name) |
|
|
|
|
|
|
|
|
def embed_batch(batch): |
|
|
return [model.encode(sentence, show_progress_bar=True) for sentence in batch] |
|
|
|
|
|
def embed(sentence): |
|
|
return model.encode(sentence) |
|
|
|
|
|
|
|
|
lancedb_dir = ".lancedb" |
|
|
if not os.path.exists(lancedb_dir): |
|
|
os.mkdir(lancedb_dir) |
|
|
uri = lancedb_dir |
|
|
db = lancedb.connect(uri) |
|
|
|
|
|
|
|
|
metadata = [[i, chunks_preview_alt] for i, chunks_preview_alt in enumerate(chunks_preview_alt)] |
|
|
doc_df = pd.DataFrame(metadata, columns=["chunk", "text"]) |
|
|
|
|
|
|
|
|
data = with_embeddings(embed_batch, doc_df) |
|
|
|
|
|
|
|
|
|
|
|
db.create_table("pdf_data", data=data) |
|
|
table = db.open_table("pdf_data") |
|
|
table.add(data=data) |
|
|
|
|
|
|
|
|
|
|
|
message = "What are the side effects of doxycycline for treating Acne?" |
|
|
results = table.search(embed(message)).limit(5).to_pandas() |
|
|
|
|
|
|
|
|
|
|
|
message = "What are the side effects of doxycycline for treating Acne?" |
|
|
results = table.search(embed(message)).limit(5).to_pandas() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
drug_names = prescriptions.split(',') |
|
|
disease = disease |
|
|
|
|
|
|
|
|
def rag_answer_drug_side_effects(name, drug_names, disease): |
|
|
|
|
|
message = f"What are the potential side effects of using {drug_names} for treating {disease}? Please provide a list of side effects specific to the use of these drugs in the context of the mentioned disease of {name} person." |
|
|
|
|
|
|
|
|
results = table.search(embed(message)).limit(10).to_pandas() |
|
|
results.sort_values(by=['_distance'], inplace=True, ascending=True) |
|
|
context = results['text'].iloc[0] |
|
|
|
|
|
|
|
|
template = """### Instruction: |
|
|
Start with Hi, {name}. Then give a compassionate answer in bullet points and list. |
|
|
Read the below input context and respond with a mid length answer to the given question. If you cannot find an exact answer then look up something nearer to the medicaiton and disease |
|
|
" |
|
|
|
|
|
### Input: |
|
|
Context: {context} |
|
|
|
|
|
Question: {question} |
|
|
|
|
|
### Response: |
|
|
""" |
|
|
|
|
|
|
|
|
prompt = template.format(context=context, question=message) |
|
|
|
|
|
|
|
|
result = pg.Completion.create( |
|
|
model="Neural-Chat-7B", |
|
|
prompt = prompt |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return result['choices'][0]['text'] |
|
|
|
|
|
|
|
|
def rag_answer_drug_benfit_effects(name, drug_names, disease): |
|
|
|
|
|
message = f"What are the potential benefits of using {drug_names} for treating {disease}? Please provide a list of benefits specific to the use of these drugs in the context of the mentioned disease of {name} person." |
|
|
|
|
|
|
|
|
results = table.search(embed(message)).limit(10).to_pandas() |
|
|
results.sort_values(by=['_distance'], inplace=True, ascending=True) |
|
|
context = results['text'].iloc[0] |
|
|
|
|
|
|
|
|
template = """### Instruction: |
|
|
Start with Hi, {name}. Then give a compassionate answer in bullet points and list. |
|
|
Read the below input context and respond with a mid length answer to the given question. If you cannot find an exact answer then look up something nearer to the medicaiton and disease |
|
|
" |
|
|
|
|
|
### Input: |
|
|
Context: {context} |
|
|
|
|
|
Question: {question} |
|
|
|
|
|
### Response: |
|
|
""" |
|
|
|
|
|
|
|
|
prompt = template.format(context=context, question=message) |
|
|
|
|
|
|
|
|
result = pg.Completion.create( |
|
|
model="Neural-Chat-7B", |
|
|
prompt = prompt |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return result['choices'][0]['text'] |
|
|
|
|
|
|
|
|
if st.button('Predict Drug Effects'): |
|
|
|
|
|
|
|
|
side_effects, benefits = rag_answer_drug_side_effects(name, drug_names, disease), rag_answer_drug_benfit_effects(name,drug_names,disease ) |
|
|
|
|
|
|
|
|
|
|
|
if submit_button: |
|
|
|
|
|
|
|
|
try: |
|
|
side_effects_response = rag_answer_drug_side_effects(name, drug_names, disease) |
|
|
benefits_response = rag_answer_drug_benfit_effects(name, drug_names, disease) |
|
|
st.write("Side Effects:", side_effects_response) |
|
|
st.write("Benefits:", benefits_response) |
|
|
except Exception as e: |
|
|
st.error(f"An error occurred: {e}") |
|
|
|
|
|
from huggingface_hub import notebook_login, Repository |
|
|
|
|
|
notebook_login() |
|
|
|
|
|
from transformers import AutoModelForSequenceClassification, AutoTokenizer |
|
|
|
|
|
|
|
|
checkpoint_path = r"filius-Dei/CiPE" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|