Spaces:

doozer21
/

FoodVision

Runtime error

App Files Files Community

doozer21 commited on Oct 27

Commit

2ce327e

verified ·

1 Parent(s): cbdf1e6

Update app.py

Browse files

Files changed (1) hide show

app.py +111 -192

app.py CHANGED Viewed

@@ -4,13 +4,10 @@
 # IMPROVEMENTS:
 # -------------
 # ✅ Mobile-friendly single-column layout
-# ✅ Fixed mobile upload issues with session state
-# ✅ Persistent predictions across reruns
-# ✅ Simplified, responsive CSS
-# ✅ Better error handling
-# ✅ Loads model from Hugging Face Hub OR local file
-# ✅ Optimized for slow connections
-# ✅ Touch-friendly interface
 #
 # ============================================================
@@ -21,7 +18,6 @@ from torchvision import transforms
 from PIL import Image
 import timm
 from pathlib import Path
-import hashlib
 # ============================================================
 # PAGE CONFIGURATION
@@ -34,37 +30,23 @@ st.set_page_config(
     initial_sidebar_state="collapsed"
 )
-# ============================================================
-# SESSION STATE INITIALIZATION
-# ============================================================
-if 'predictions' not in st.session_state:
-    st.session_state.predictions = None
-if 'processed_image' not in st.session_state:
-    st.session_state.processed_image = None
-if 'last_image_hash' not in st.session_state:
-    st.session_state.last_image_hash = None
 # ============================================================
 # MINIMAL CSS (Mobile-First)
 # ============================================================
 st.markdown("""
 <style>
-    /* Remove extra padding on mobile */
     .block-container {
         padding-top: 2rem;
         padding-bottom: 2rem;
     }
-    /* Cleaner header */
     h1 {
         text-align: center;
         color: #FF6B6B;
         margin-bottom: 0.5rem;
     }
-    /* Result cards */
     .prediction-card {
         background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
         padding: 1.5rem;
@@ -85,14 +67,12 @@ st.markdown("""
         opacity: 0.9;
     }
-    /* Confidence bars */
     .conf-bar {
         background: #f0f0f0;
         border-radius: 8px;
         height: 36px;
         margin: 0.5rem 0;
         overflow: hidden;
-        position: relative;
     }
     .conf-fill {
@@ -105,16 +85,6 @@ st.markdown("""
         font-weight: 600;
         font-size: 0.95rem;
     }
-    /* Make file uploader more visible */
-    .stFileUploader {
-        margin-bottom: 1rem;
-    }
-    /* Make camera input more visible */
-    .stCameraInput {
-        margin-top: 1rem;
-    }
 </style>
 """, unsafe_allow_html=True)
@@ -145,52 +115,35 @@ FOOD_CLASSES = [
     "sushi", "tacos", "takoyaki", "tiramisu", "tuna_tartare", "waffles"
 ]
-# ============================================================
-# HELPER FUNCTIONS
-# ============================================================
-def get_image_hash(image_bytes):
-    """Create a hash of image bytes to detect if it's a new image."""
-    return hashlib.md5(image_bytes).hexdigest()
 # ============================================================
 # MODEL LOADING
 # ============================================================
 @st.cache_resource
 def load_model():
-    """
-    Loads model from local file or Hugging Face Hub.
-    Cached for performance across sessions.
-    """
     try:
         device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-        # Try loading from local file first (for HF Spaces)
         local_path = Path("model1_best.pth")
         if local_path.exists():
-            checkpoint = torch.load(local_path, map_location=device)
         else:
-            # Fallback: try to download from HF Hub
             try:
                 from huggingface_hub import hf_hub_download
                 model_path = hf_hub_download(
                     repo_id="doozer21/FoodVision",
                     filename="model1_best.pth"
                 )
-                checkpoint = torch.load(model_path, map_location=device)
-            except Exception as e:
-                st.error("❌ Could not load model from local file or Hugging Face Hub")
-                st.info("Make sure model1_best.pth is in your Space's repository")
                 return None, None, None
-        # Get config
         model_config = checkpoint.get('model_config', {
             'model_id': 'convnextv2_base.fcmae_ft_in22k_in1k_384'
         })
-        # Create and load model
         model = timm.create_model(
             model_config['model_id'],
             pretrained=False,
@@ -202,7 +155,6 @@ def load_model():
         model.eval()
         accuracy = checkpoint.get('best_val_acc', 0)
         return model, device, accuracy
     except Exception as e:
@@ -251,6 +203,50 @@ def predict(model, image_tensor, device, top_k=3):
         return results
 # ============================================================
 # MAIN APP
 # ============================================================
@@ -260,14 +256,14 @@ def main():
     st.title("🍕 FoodVision AI")
     st.markdown("**Identify 101 food dishes instantly**")
-    # Load model with status
-    with st.spinner("🔄 Loading AI model..."):
-        model, device, accuracy = load_model()
     if model is None:
         st.stop()
-    # Show model info in expander (cleaner for mobile)
     with st.expander("ℹ️ Model Info"):
         st.write(f"**Architecture:** ConvNeXt V2 Base")
         st.write(f"**Accuracy:** {accuracy:.2f}%")
@@ -276,149 +272,72 @@ def main():
     st.markdown("---")
-    # Single-column layout (mobile-friendly)
-    st.subheader("📸 Upload or Take a Photo")
-    # File uploader
-    uploaded_file = st.file_uploader(
-        "Choose a food image",
-        type=['jpg', 'jpeg', 'png', 'webp'],
-        key="file_uploader"
-    )
-    # Camera input (below uploader)
-    st.markdown("**Or use your camera:**")
-    camera_photo = st.camera_input(
-        "Take a picture",
-        key="camera_input"
-    )
-    # Determine which image to use
-    image_source = None
-    source_name = ""
-    image_bytes = None
-    if camera_photo is not None:
-        image_source = camera_photo
-        source_name = "camera"
-        image_bytes = camera_photo.getvalue()
-    elif uploaded_file is not None:
-        image_source = uploaded_file
-        source_name = "upload"
-        image_bytes = uploaded_file.getvalue()
-    # Process image if we have one
-    if image_source is not None and image_bytes is not None:
-        try:
-            # Check if this is a new image
-            current_hash = get_image_hash(image_bytes)
-            # Only process if it's a new image
-            if current_hash != st.session_state.last_image_hash:
-                # Load image
-                image = Image.open(image_source)
-                # Store image in session state
-                st.session_state.processed_image = image
-                st.session_state.last_image_hash = current_hash
-                # Show loading indicator
-                with st.spinner("🧠 Analyzing your food..."):
-                    # Preprocess and predict
                     img_tensor = preprocess_image(image)
                     predictions = predict(model, img_tensor, device, top_k=3)
-                    # Store predictions in session state
-                    st.session_state.predictions = predictions
-            # Display results (from session state)
-            if st.session_state.processed_image is not None:
-                # Show image preview
-                st.image(
-                    st.session_state.processed_image,
-                    caption=f"Image from {source_name}",
-                    use_column_width=True
-                )
-            if st.session_state.predictions is not None:
-                st.markdown("---")
-                # Display top prediction prominently
-                top_food, top_conf = st.session_state.predictions[0]
-                st.markdown(f"""
-                <div class="prediction-card">
-                    <h2>🏆 {top_food}</h2>
-                    <h3>{top_conf:.1f}% Confidence</h3>
-                </div>
-                """, unsafe_allow_html=True)
-                # Show all top-3 predictions
-                st.markdown("### 📊 Top 3 Predictions")
-                for i, (food, conf) in enumerate(st.session_state.predictions, 1):
-                    emoji = "🥇" if i == 1 else "🥈" if i == 2 else "🥉"
-                    st.markdown(f"**{emoji} {food}**")
-                    st.markdown(f"""
-                    <div class="conf-bar">
-                        <div class="conf-fill" style="width: {conf}%">
-                            {conf:.1f}%
-                        </div>
-                    </div>
-                    """, unsafe_allow_html=True)
-                # Feedback based on confidence
-                st.markdown("---")
-                if top_conf > 90:
-                    st.success("🎉 **Very confident!** The model is very sure about this prediction.")
-                elif top_conf > 70:
-                    st.success("👍 **Good confidence!** This looks like a solid prediction.")
-                elif top_conf > 50:
-                    st.warning("🤔 **Moderate confidence.** The food might be ambiguous or partially visible.")
-                else:
-                    st.warning("😕 **Low confidence.** Try a clearer photo with better lighting.")
-                # Add a clear button to reset
-                if st.button("🔄 Analyze Another Image", use_container_width=True):
-                    st.session_state.predictions = None
-                    st.session_state.processed_image = None
-                    st.session_state.last_image_hash = None
-                    st.rerun()
-        except Exception as e:
-            st.error(f"❌ Error: {str(e)}")
-            st.info("Try a different image or check if the file is corrupted")
-            # Reset state on error
-            st.session_state.predictions = None
-            st.session_state.processed_image = None
-            st.session_state.last_image_hash = None
-    else:
-        # Instructions (only show if no predictions)
-        if st.session_state.predictions is None:
-            st.info("👆 Upload a food image or take a photo to get started!")
-            with st.expander("💡 Tips for Best Results"):
-                st.markdown("""
-                - Use clear, well-lit photos
-                - Make sure food is the main subject
-                - Avoid heavily filtered images
-                - Try different angles if confidence is low
-                - Works best with common dishes
-                """)
-            with st.expander("🍽️ What can it recognize?"):
-                st.markdown("""
-                The model can identify **101 popular dishes** including:
-                - 🍕 Pizza, Pasta, Burgers
-                - 🍣 Sushi, Ramen, Pad Thai
-                - 🥗 Salads, Sandwiches
-                - 🍰 Desserts (cakes, ice cream, etc.)
-                - 🍳 Breakfast foods
-                - And many more!
-                """)
     # Footer
     st.markdown("---")

 # IMPROVEMENTS:
 # -------------
 # ✅ Mobile-friendly single-column layout
+# ✅ SIMPLIFIED: No session state complexity
+# ✅ Direct processing on every upload
+# ✅ Works reliably on mobile
+# ✅ No unnecessary buttons
 #
 # ============================================================
 from PIL import Image
 import timm
 from pathlib import Path
 # ============================================================
 # PAGE CONFIGURATION
     initial_sidebar_state="collapsed"
 )
 # ============================================================
 # MINIMAL CSS (Mobile-First)
 # ============================================================
 st.markdown("""
 <style>
     .block-container {
         padding-top: 2rem;
         padding-bottom: 2rem;
     }
     h1 {
         text-align: center;
         color: #FF6B6B;
         margin-bottom: 0.5rem;
     }
     .prediction-card {
         background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
         padding: 1.5rem;
         opacity: 0.9;
     }
     .conf-bar {
         background: #f0f0f0;
         border-radius: 8px;
         height: 36px;
         margin: 0.5rem 0;
         overflow: hidden;
     }
     .conf-fill {
         font-weight: 600;
         font-size: 0.95rem;
     }
 </style>
 """, unsafe_allow_html=True)
     "sushi", "tacos", "takoyaki", "tiramisu", "tuna_tartare", "waffles"
 ]
 # ============================================================
 # MODEL LOADING
 # ============================================================
 @st.cache_resource
 def load_model():
+    """Loads model from local file or Hugging Face Hub."""
     try:
         device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         local_path = Path("model1_best.pth")
         if local_path.exists():
+            checkpoint = torch.load(local_path, map_location=device, weights_only=False)
         else:
             try:
                 from huggingface_hub import hf_hub_download
                 model_path = hf_hub_download(
                     repo_id="doozer21/FoodVision",
                     filename="model1_best.pth"
                 )
+                checkpoint = torch.load(model_path, map_location=device, weights_only=False)
+            except Exception:
                 return None, None, None
         model_config = checkpoint.get('model_config', {
             'model_id': 'convnextv2_base.fcmae_ft_in22k_in1k_384'
         })
         model = timm.create_model(
             model_config['model_id'],
             pretrained=False,
         model.eval()
         accuracy = checkpoint.get('best_val_acc', 0)
         return model, device, accuracy
     except Exception as e:
         return results
+# ============================================================
+# DISPLAY RESULTS
+# ============================================================
+def display_results(predictions):
+    """Display prediction results."""
+    st.markdown("---")
+    # Top prediction
+    top_food, top_conf = predictions[0]
+    st.markdown(f"""
+    <div class="prediction-card">
+        <h2>🏆 {top_food}</h2>
+        <h3>{top_conf:.1f}% Confidence</h3>
+    </div>
+    """, unsafe_allow_html=True)
+    # Top 3 predictions
+    st.markdown("### 📊 Top 3 Predictions")
+    for i, (food, conf) in enumerate(predictions, 1):
+        emoji = "🥇" if i == 1 else "🥈" if i == 2 else "🥉"
+        st.markdown(f"**{emoji} {food}**")
+        st.markdown(f"""
+        <div class="conf-bar">
+            <div class="conf-fill" style="width: {conf}%">
+                {conf:.1f}%
+            </div>
+        </div>
+        """, unsafe_allow_html=True)
+    # Feedback
+    st.markdown("---")
+    if top_conf > 90:
+        st.success("🎉 **Very confident!** The model is very sure.")
+    elif top_conf > 70:
+        st.success("👍 **Good confidence!** Solid prediction.")
+    elif top_conf > 50:
+        st.warning("🤔 **Moderate confidence.** Food might be ambiguous.")
+    else:
+        st.warning("😕 **Low confidence.** Try a clearer photo.")
 # ============================================================
 # MAIN APP
 # ============================================================
     st.title("🍕 FoodVision AI")
     st.markdown("**Identify 101 food dishes instantly**")
+    # Load model
+    model, device, accuracy = load_model()
     if model is None:
+        st.error("❌ Could not load model. Check if model1_best.pth exists.")
         st.stop()
+    # Model info
     with st.expander("ℹ️ Model Info"):
         st.write(f"**Architecture:** ConvNeXt V2 Base")
         st.write(f"**Accuracy:** {accuracy:.2f}%")
     st.markdown("---")
+    # Input section
+    st.subheader("📸 Choose Your Input Method")
+    # Tab-based approach (better for mobile)
+    tab1, tab2 = st.tabs(["📁 Upload Image", "📷 Take Photo"])
+    with tab1:
+        uploaded_file = st.file_uploader(
+            "Select a food image",
+            type=['jpg', 'jpeg', 'png', 'webp'],
+            label_visibility="collapsed"
+        )
+        if uploaded_file is not None:
+            try:
+                image = Image.open(uploaded_file)
+                st.image(image, caption="Uploaded Image", use_column_width=True)
+                with st.spinner("🧠 Analyzing..."):
                     img_tensor = preprocess_image(image)
                     predictions = predict(model, img_tensor, device, top_k=3)
+                display_results(predictions)
+            except Exception as e:
+                st.error(f"❌ Error: {str(e)}")
+    with tab2:
+        camera_photo = st.camera_input("Take a picture", label_visibility="collapsed")
+        if camera_photo is not None:
+            try:
+                image = Image.open(camera_photo)
+                st.image(image, caption="Camera Photo", use_column_width=True)
+                with st.spinner("🧠 Analyzing..."):
+                    img_tensor = preprocess_image(image)
+                    predictions = predict(model, img_tensor, device, top_k=3)
+                display_results(predictions)
+            except Exception as e:
+                st.error(f"❌ Error: {str(e)}")
+    # Instructions (show at bottom when no image)
+    if uploaded_file is None and camera_photo is None:
+        st.info("👆 Choose a tab above to get started!")
+        with st.expander("💡 Tips for Best Results"):
+            st.markdown("""
+            - Use clear, well-lit photos
+            - Make sure food is the main subject
+            - Avoid heavily filtered images
+            - Try different angles if confidence is low
+            """)
+        with st.expander("🍽️ What can it recognize?"):
+            st.markdown("""
+            **101 popular dishes** including:
+            - 🍕 Pizza, Pasta, Burgers
+            - 🍣 Sushi, Ramen, Pad Thai
+            - 🥗 Salads, Sandwiches
+            - 🍰 Desserts (cakes, ice cream)
+            - 🍳 Breakfast foods
+            - And many more!
+            """)
     # Footer
     st.markdown("---")