Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| from transformers import pipeline | |
| import tempfile | |
| import os | |
| def calculate_star_rating(positive_percent): | |
| """Convert positive percentage to star rating""" | |
| if positive_percent >= 80: | |
| return 5 | |
| elif positive_percent >= 60: | |
| return 4 | |
| elif positive_percent >= 40: | |
| return 3 | |
| elif positive_percent >= 20: | |
| return 2 | |
| else: | |
| return 1 | |
| def main(): | |
| st.set_page_config(page_title="Movie Review Analysis System", page_icon="π¬") | |
| # Custom styles | |
| st.markdown(""" | |
| <style> | |
| .reportview-container { | |
| background: #f0f2f6; | |
| } | |
| .stProgress > div > div > div > div { | |
| background-color: #4CAF50; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # Model loading | |
| with st.spinner("Loading all models, this may take a few minutes..."): | |
| try: | |
| # Sentiment analysis model | |
| classifier = pipeline( | |
| "text-classification", | |
| model="KeonBlackwell/movie_sentiment_model", | |
| tokenizer="distilbert-base-uncased" | |
| ) | |
| # Keyphrase extraction model | |
| keyphrase_extractor = pipeline( | |
| "token-classification", | |
| model="ml6team/keyphrase-extraction-distilbert-inspec", | |
| aggregation_strategy="simple" | |
| ) | |
| # Summarization model | |
| summarizer = pipeline("summarization", | |
| model="facebook/bart-large-cnn") | |
| except Exception as e: | |
| st.error(f"Model loading failed: {str(e)}") | |
| return | |
| # Page layout | |
| st.title("π¬ Movie Review Batch Analysis System") | |
| st.markdown(""" | |
| ### Instructions: | |
| 1. Upload a CSV file containing movie reviews (must include a 'comment' column) | |
| 2. The system will automatically analyze the sentiment of each review | |
| 3. Generate overall ratings, keyphrase extraction, and summary reports | |
| """) | |
| # File upload | |
| uploaded_file = st.file_uploader("Upload CSV file", type=["csv"]) | |
| if uploaded_file is not None: | |
| # Read data | |
| try: | |
| df = pd.read_csv(uploaded_file) | |
| if 'comment' not in df.columns: | |
| st.error("The CSV file must contain a 'comment' column") | |
| return | |
| comments = df['comment'].tolist() | |
| except Exception as e: | |
| st.error(f"File reading failed: {str(e)}") | |
| return | |
| # Show preview | |
| with st.expander("Preview of Original Data (First 5 Rows)"): | |
| st.dataframe(df.head()) | |
| if st.button("Start Analysis"): | |
| # Progress bar settings | |
| progress_bar = st.progress(0) | |
| status_text = st.empty() | |
| results = [] | |
| total = len(comments) | |
| # Batch prediction | |
| try: | |
| # Sentiment analysis | |
| for i, comment in enumerate(comments): | |
| progress = (i+1)/total | |
| progress_bar.progress(progress) | |
| status_text.text(f"Analyzing sentiment for {i+1}/{total} reviews...") | |
| prediction = classifier(comment)[0] | |
| results.append({ | |
| 'comment': comment, | |
| 'sentiment': 1 if prediction['label'] == 'LABEL_1' else 0, | |
| 'confidence': prediction['score'] | |
| }) | |
| # Convert to DataFrame | |
| result_df = pd.DataFrame(results) | |
| # Calculate statistics | |
| positive_count = result_df['sentiment'].sum() | |
| total_reviews = len(result_df) | |
| positive_percent = (positive_count / total_reviews) * 100 | |
| star_rating = calculate_star_rating(positive_percent) | |
| # Show results | |
| st.success("Sentiment analysis completed!") | |
| # Rating display | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.metric("β Overall Rating", f"{star_rating} Stars") | |
| with col2: | |
| st.metric("π Positive Reviews", f"{positive_count}/{total_reviews}") | |
| with col3: | |
| st.metric("π Positive Ratio", f"{positive_percent:.1f}%") | |
| # Progress bar visualization | |
| st.progress(positive_percent/100) | |
| # Show example results | |
| with st.expander("View Detailed Analysis Results (First 10 Rows)"): | |
| st.dataframe(result_df.head(10)) | |
| # Keyphrase extraction and summary | |
| st.subheader("π Keyphrase Extraction and Summary of Reviews") | |
| # Combine all comments into a single text | |
| combined_text = " ".join(comments) | |
| # Keyphrase extraction | |
| with st.spinner("Extracting keyphrases..."): | |
| keyphrases = keyphrase_extractor(combined_text) | |
| # Sort by confidence and take the top 5 | |
| top_keyphrases = sorted(keyphrases, key=lambda x: x['score'], reverse=True)[:5] | |
| # Show keyphrases | |
| st.markdown("**π Extracted Keyphrases:**") | |
| cols = st.columns(5) | |
| for i, phrase in enumerate(top_keyphrases): | |
| cols[i].markdown(f""" | |
| <div style=" | |
| border: 1px solid #ddd; | |
| border-radius: 5px; | |
| padding: 10px; | |
| text-align: center; | |
| margin: 5px; | |
| background-color: #add8e6; | |
| "> | |
| <b>{phrase['word']}</b><br> | |
| <small>Confidence: {phrase['score']:.2f}</small> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Generate summary | |
| with st.spinner("Generating review summary..."): | |
| # Limit text length to avoid model limitations | |
| max_length = 1024 # Maximum input length for the model | |
| if len(combined_text) > max_length: | |
| combined_text = combined_text[:max_length] | |
| summary = summarizer(combined_text, | |
| max_length=130, | |
| min_length=30, | |
| do_sample=False) | |
| # Show summary | |
| st.markdown("**π Review Summary:**") | |
| st.info(summary[0]['summary_text']) | |
| # Generate downloadable file | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp: | |
| result_df.to_csv(tmp.name, index=False) | |
| with open(tmp.name, "rb") as f: | |
| st.download_button( | |
| label="Download Full Results", | |
| data=f, | |
| file_name="analysis_results.csv", | |
| mime="text/csv" | |
| ) | |
| os.unlink(tmp.name) | |
| except Exception as e: | |
| st.error(f"An error occurred during analysis: {str(e)}") | |
| finally: | |
| progress_bar.empty() | |
| status_text.empty() | |
| if __name__ == "__main__": | |
| main() |