test2025SpL2

Sleeping

App Files Files Community

test2025SpL2 / app.py

ysuneu

Update app.py

4d65165 verified 8 months ago

raw

history blame contribute delete

7.48 kB

	import streamlit as st
	import pandas as pd
	from transformers import pipeline
	import tempfile
	import os

	def calculate_star_rating(positive_percent):
	"""Convert positive percentage to star rating"""
	if positive_percent >= 80:
	return 5
	elif positive_percent >= 60:
	return 4
	elif positive_percent >= 40:
	return 3
	elif positive_percent >= 20:
	return 2
	else:
	return 1

	def main():
	st.set_page_config(page_title="Movie Review Analysis System", page_icon="🎬")

	# Custom styles
	st.markdown("""
	<style>
	.reportview-container {
	background: #f0f2f6;
	}
	.stProgress > div > div > div > div {
	background-color: #4CAF50;
	}
	</style>
	""", unsafe_allow_html=True)

	# Model loading
	with st.spinner("Loading all models, this may take a few minutes..."):
	try:
	# Sentiment analysis model
	classifier = pipeline(
	"text-classification",
	model="KeonBlackwell/movie_sentiment_model",
	tokenizer="distilbert-base-uncased"
	)

	# Keyphrase extraction model
	keyphrase_extractor = pipeline(
	"token-classification",
	model="ml6team/keyphrase-extraction-distilbert-inspec",
	aggregation_strategy="simple"
	)

	# Summarization model
	summarizer = pipeline("summarization",
	model="facebook/bart-large-cnn")

	except Exception as e:
	st.error(f"Model loading failed: {str(e)}")
	return

	# Page layout
	st.title("🎬 Movie Review Batch Analysis System")
	st.markdown("""
	### Instructions:
	1. Upload a CSV file containing movie reviews (must include a 'comment' column)
	2. The system will automatically analyze the sentiment of each review
	3. Generate overall ratings, keyphrase extraction, and summary reports
	""")

	# File upload
	uploaded_file = st.file_uploader("Upload CSV file", type=["csv"])

	if uploaded_file is not None:
	# Read data
	try:
	df = pd.read_csv(uploaded_file)
	if 'comment' not in df.columns:
	st.error("The CSV file must contain a 'comment' column")
	return

	comments = df['comment'].tolist()
	except Exception as e:
	st.error(f"File reading failed: {str(e)}")
	return

	# Show preview
	with st.expander("Preview of Original Data (First 5 Rows)"):
	st.dataframe(df.head())

	if st.button("Start Analysis"):
	# Progress bar settings
	progress_bar = st.progress(0)
	status_text = st.empty()

	results = []
	total = len(comments)

	# Batch prediction
	try:
	# Sentiment analysis
	for i, comment in enumerate(comments):
	progress = (i+1)/total
	progress_bar.progress(progress)
	status_text.text(f"Analyzing sentiment for {i+1}/{total} reviews...")

	prediction = classifier(comment)[0]
	results.append({
	'comment': comment,
	'sentiment': 1 if prediction['label'] == 'LABEL_1' else 0,
	'confidence': prediction['score']
	})

	# Convert to DataFrame
	result_df = pd.DataFrame(results)

	# Calculate statistics
	positive_count = result_df['sentiment'].sum()
	total_reviews = len(result_df)
	positive_percent = (positive_count / total_reviews) * 100
	star_rating = calculate_star_rating(positive_percent)

	# Show results
	st.success("Sentiment analysis completed!")

	# Rating display
	col1, col2, col3 = st.columns(3)
	with col1:
	st.metric("⭐ Overall Rating", f"{star_rating} Stars")
	with col2:
	st.metric("👍 Positive Reviews", f"{positive_count}/{total_reviews}")
	with col3:
	st.metric("📈 Positive Ratio", f"{positive_percent:.1f}%")

	# Progress bar visualization
	st.progress(positive_percent/100)

	# Show example results
	with st.expander("View Detailed Analysis Results (First 10 Rows)"):
	st.dataframe(result_df.head(10))

	# Keyphrase extraction and summary
	st.subheader("📌 Keyphrase Extraction and Summary of Reviews")

	# Combine all comments into a single text
	combined_text = " ".join(comments)

	# Keyphrase extraction
	with st.spinner("Extracting keyphrases..."):
	keyphrases = keyphrase_extractor(combined_text)
	# Sort by confidence and take the top 5
	top_keyphrases = sorted(keyphrases, key=lambda x: x['score'], reverse=True)[:5]

	# Show keyphrases
	st.markdown("🔍 Extracted Keyphrases:")
	cols = st.columns(5)
	for i, phrase in enumerate(top_keyphrases):
	cols[i].markdown(f"""
	<div style="
	border: 1px solid #ddd;
	border-radius: 5px;
	padding: 10px;
	text-align: center;
	margin: 5px;
	background-color: #add8e6;
	">
	<b>{phrase['word']}</b><br>
	<small>Confidence: {phrase['score']:.2f}</small>
	</div>
	""", unsafe_allow_html=True)

	# Generate summary
	with st.spinner("Generating review summary..."):
	# Limit text length to avoid model limitations
	max_length = 1024 # Maximum input length for the model
	if len(combined_text) > max_length:
	combined_text = combined_text[:max_length]

	summary = summarizer(combined_text,
	max_length=130,
	min_length=30,
	do_sample=False)

	# Show summary
	st.markdown("📝 Review Summary:")
	st.info(summary[0]['summary_text'])

	# Generate downloadable file
	with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
	result_df.to_csv(tmp.name, index=False)
	with open(tmp.name, "rb") as f:
	st.download_button(
	label="Download Full Results",
	data=f,
	file_name="analysis_results.csv",
	mime="text/csv"
	)
	os.unlink(tmp.name)

	except Exception as e:
	st.error(f"An error occurred during analysis: {str(e)}")
	finally:
	progress_bar.empty()
	status_text.empty()

	if __name__ == "__main__":
	main()