COS30082

Sleeping

App Files Files Community

FrAnKu34t23 commited on 23 days ago

Commit

f8ed33a

verified ·

1 Parent(s): 64262de

Update new_approach/spa_ensemble.py

Browse files

Files changed (1) hide show

new_approach/spa_ensemble.py +20 -10

new_approach/spa_ensemble.py CHANGED Viewed

@@ -32,8 +32,11 @@ class FeatureExtractor:
                 features.update({f'color_{channel}_mean': float(np.mean(ch)), f'color_{channel}_std': float(np.std(ch)), f'color_{channel}_skew': float(stats.skew(ch)), f'color_{channel}_min': float(np.min(ch)), f'color_{channel}_max': float(np.max(ch))})
             else:
                  features.update({f'color_{channel}_mean': 0.0, f'color_{channel}_std': 0.0, f'color_{channel}_skew': 0.0, f'color_{channel}_min': 0.0, f'color_{channel}_max': 0.0})
-            hist, _ = np.histogram(ch, bins=3, range=(0, 256)); hist = hist / (hist.sum() + 1e-8);
-            for j, v in enumerate(hist): features[f'color_{channel}_hist_bin{j}'] = float(v)
         try:
             hsv = cv2.cvtColor(img_np, cv2.COLOR_RGB2HSV)
             features.update({'color_hue_mean': float(np.mean(hsv[:, :, 0])), 'color_saturation_mean': float(np.mean(hsv[:, :, 1])), 'color_value_mean': float(np.mean(hsv[:, :, 2]))})
@@ -136,18 +139,22 @@ class BioCLIP2ZeroShot:
         prototypes = self.text_features_prototypes
         try: logit_scale = self.model.logit_scale.exp()
         except: logit_scale = torch.tensor(100.0).to(self.device)
-        logits = (logit_scale * image_features @ prototypes.T).cpu().numpy().squeeze()
         return logits
 class EnsembleClassifier(nn.Module):
-    def __init__(self, num_handcrafted_features=49, dinov2_dim=1024, bioclip2_dim=100,
-                 num_classes=100, hidden_dim=512, dropout_rate=0.3, prototype_dim=512):
         super().__init__()
         self.dinov2_proj = nn.Sequential(nn.Linear(dinov2_dim, hidden_dim), nn.ReLU(), nn.Dropout(dropout_rate))
         self.handcraft_branch = nn.Sequential(
             nn.Linear(num_handcrafted_features, 128), nn.BatchNorm1d(128), nn.ReLU(), nn.Dropout(dropout_rate),
-            nn.Linear(128, hidden_dim // 2), nn.BatchNorm1d(hidden_dim // 2), nn.ReLU(), nn.Dropout(dropout_rate),
-            nn.Linear(hidden_dim // 2, hidden_dim // 2), nn.BatchNorm1d(hidden_dim // 2), nn.ReLU(), nn.Dropout(dropout_rate))
         self.bioclip2_branch = nn.Sequential(
             nn.Linear(bioclip2_dim, hidden_dim // 4), nn.BatchNorm1d(hidden_dim // 4), nn.ReLU(), nn.Dropout(dropout_rate * 0.5))
         fusion_input_dim = hidden_dim + hidden_dim // 2 + hidden_dim // 4
@@ -204,7 +211,8 @@ class ModelManager:
             print(f"Warning: Could not download scaler from {self.REPO_ID}: {e}.")
             print("Using dummy scaler (predictions may be inaccurate).")
             self.scaler = StandardScaler()
-            self.scaler.fit(np.zeros((1, 49)))
         # 4. Download & Load Ensemble Models
         self.models = []
@@ -219,9 +227,11 @@ class ModelManager:
                 model_path = hf_hub_download(repo_id=self.REPO_ID, filename=filename)
                 # Load
                 model = EnsembleClassifier(
-                    num_handcrafted_features=49, dinov2_dim=1024, bioclip2_dim=self.num_classes,
-                    num_classes=self.num_classes, hidden_dim=hidden_dims[i], dropout_rate=dropout_rates[i]
                 )
                 state_dict = torch.load(model_path, map_location=self.device)
                 model.load_state_dict(state_dict)

                 features.update({f'color_{channel}_mean': float(np.mean(ch)), f'color_{channel}_std': float(np.std(ch)), f'color_{channel}_skew': float(stats.skew(ch)), f'color_{channel}_min': float(np.min(ch)), f'color_{channel}_max': float(np.max(ch))})
             else:
                  features.update({f'color_{channel}_mean': 0.0, f'color_{channel}_std': 0.0, f'color_{channel}_skew': 0.0, f'color_{channel}_min': 0.0, f'color_{channel}_max': 0.0})
+            # --- FIX: Removed Histogram extraction (9 features) to match the 40 features expected by your .pth files ---
+            # hist, _ = np.histogram(ch, bins=3, range=(0, 256)); hist = hist / (hist.sum() + 1e-8);
+            # for j, v in enumerate(hist): features[f'color_{channel}_hist_bin{j}'] = float(v)
         try:
             hsv = cv2.cvtColor(img_np, cv2.COLOR_RGB2HSV)
             features.update({'color_hue_mean': float(np.mean(hsv[:, :, 0])), 'color_saturation_mean': float(np.mean(hsv[:, :, 1])), 'color_value_mean': float(np.mean(hsv[:, :, 2]))})
         prototypes = self.text_features_prototypes
         try: logit_scale = self.model.logit_scale.exp()
         except: logit_scale = torch.tensor(100.0).to(self.device)
+        # --- FIX: Added .detach() before .numpy() ---
+        logits = (logit_scale * image_features @ prototypes.T).detach().cpu().numpy().squeeze()
         return logits
 class EnsembleClassifier(nn.Module):
+    def __init__(self, num_handcrafted_features=40, dinov2_dim=1024, bioclip2_dim=100,
+                 num_classes=100, hidden_dim=512, dropout_rate=0.3, prototype_dim=768):
         super().__init__()
         self.dinov2_proj = nn.Sequential(nn.Linear(dinov2_dim, hidden_dim), nn.ReLU(), nn.Dropout(dropout_rate))
+        # --- FIX: Removed 3rd layer to match training checkpoint (Size mismatch error) ---
         self.handcraft_branch = nn.Sequential(
             nn.Linear(num_handcrafted_features, 128), nn.BatchNorm1d(128), nn.ReLU(), nn.Dropout(dropout_rate),
+            nn.Linear(128, hidden_dim // 2), nn.BatchNorm1d(hidden_dim // 2), nn.ReLU(), nn.Dropout(dropout_rate)
+        )
         self.bioclip2_branch = nn.Sequential(
             nn.Linear(bioclip2_dim, hidden_dim // 4), nn.BatchNorm1d(hidden_dim // 4), nn.ReLU(), nn.Dropout(dropout_rate * 0.5))
         fusion_input_dim = hidden_dim + hidden_dim // 2 + hidden_dim // 4
             print(f"Warning: Could not download scaler from {self.REPO_ID}: {e}.")
             print("Using dummy scaler (predictions may be inaccurate).")
             self.scaler = StandardScaler()
+            # FIX: Fit on 40 zeros instead of 49 to match the feature reduction
+            self.scaler.fit(np.zeros((1, 40)))
         # 4. Download & Load Ensemble Models
         self.models = []
                 model_path = hf_hub_download(repo_id=self.REPO_ID, filename=filename)
                 # Load
+                # FIX: Passed num_handcrafted_features=40 and prototype_dim=768 to match weights
                 model = EnsembleClassifier(
+                    num_handcrafted_features=40, dinov2_dim=1024, bioclip2_dim=self.num_classes,
+                    num_classes=self.num_classes, hidden_dim=hidden_dims[i], dropout_rate=dropout_rates[i],
+                    prototype_dim=768
                 )
                 state_dict = torch.load(model_path, map_location=self.device)
                 model.load_state_dict(state_dict)