import tempfile import zipfile from typing import Optional from pathlib import Path from dataset import ImageDataset def create_dataset_zip( dataset: ImageDataset, organize_in_folders: bool = True ) -> Optional[str]: """Create a zip file containing images and their text labels. Args: dataset: ImageDataset containing images with labels organize_in_folders: If True, puts images in 'images/' folder and texts in 'labels/' folder. If False, puts all files in the root folder. Returns: str: Path to the created zip file, or None if dataset is empty """ if not dataset.images: return None # Create a temporary file temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".zip") temp_path = temp_file.name temp_file.close() try: with zipfile.ZipFile(temp_path, "w", zipfile.ZIP_DEFLATED) as zip_file: for image_data in dataset.images: image_path = image_data["path"] label = image_data["label"] try: # Determine file paths based on organization option image_filename = Path(image_path).name base_name = Path(image_path).stem text_filename = f"{base_name}.txt" if organize_in_folders: image_zip_path = f"images/{image_filename}" text_zip_path = f"labels/{text_filename}" else: image_zip_path = image_filename text_zip_path = text_filename # Add the image file to zip with open(image_path, "rb") as img_file: zip_file.writestr(image_zip_path, img_file.read()) # Create and add the text file zip_file.writestr(text_zip_path, label.encode("utf-8")) except FileNotFoundError: # Skip if image file doesn't exist continue except Exception as e: # Log error but continue with other files print(f"Error processing {image_path}: {e}") continue return temp_path except Exception as e: # Clean up temp file if creation failed Path(temp_path).unlink(missing_ok=True) print(f"Error creating zip file: {e}") return None # Create a temporary file temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".zip") temp_path = temp_file.name temp_file.close() try: with zipfile.ZipFile(temp_path, "w", zipfile.ZIP_DEFLATED) as zip_file: for image_data in dataset.images: image_path = image_data["path"] label = image_data["label"] try: # Add the image file to zip with open(image_path, "rb") as img_file: zip_file.writestr( f"images/{Path(image_path).name}", img_file.read() ) # Create and add the text file with the same base name base_name = Path(image_path).stem text_filename = f"labels/{base_name}.txt" zip_file.writestr(text_filename, label.encode("utf-8")) except FileNotFoundError: # Skip if image file doesn't exist continue except Exception as e: # Log error but continue with other files print(f"Error processing {image_path}: {e}") continue return temp_path except Exception as e: # Clean up temp file if creation failed Path(temp_path).unlink(missing_ok=True) print(f"Error creating zip file: {e}") return None