Metal3d commited on
Commit
6904fc7
·
1 Parent(s): 52e44f1

This is almost good

Browse files
Files changed (5) hide show
  1. .gitignore +6 -0
  2. app.py +171 -92
  3. dataset.py +2 -0
  4. documentation.py +217 -0
  5. packager.py +109 -0
.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ .py[cod]
2
+ __pycache__/
3
+ .mypy_cache/
4
+ .pytest_cache/
5
+ .ruff_cache/
6
+ .pdm-python
app.py CHANGED
@@ -4,17 +4,23 @@ from PIL import Image
4
 
5
  from dataset import ImageDataset
6
  from labelizer import get_task_response
 
 
7
 
8
 
9
- def auto_label(image):
10
  text = get_task_response("<MORE_DETAILED_CAPTION>", image)
11
- return image, text
 
12
 
13
 
14
- def auto_label_and_update(dataset, im, image_id):
15
- """Generate label and return updated dataset."""
16
- text = get_task_response("<MORE_DETAILED_CAPTION>", im)
17
- return dataset.update_label(image_id, text)
 
 
 
18
 
19
 
20
  def uploaded(files, current_dataset):
@@ -30,109 +36,152 @@ def labelize_all_images(dataset, label, progress=gr.Progress(True)):
30
  # Generate actual labels
31
  labels_dict = {}
32
  for imdata in progress.tqdm(dataset.images):
33
- text = get_task_response("<MORE_DETAILED_CAPTION>", Image.open(imdata["path"]))
34
- print(text)
35
- labels_dict[imdata["id"]] = text
36
 
37
  return dataset.update_all_labels(labels_dict), label
38
 
39
 
40
- def update_buttons_states(dataset):
 
 
 
 
 
 
 
41
  count = len(dataset.images)
42
  return (
43
- gr.update(interactive=count > 0), # remove all
44
- gr.update(interactive=count > 0), # label all
 
 
 
 
45
  )
46
 
47
 
48
- CSS = """
49
- .label_image_box {
50
- border-radius: 1rem;
51
- background: var(--panel-background-fill);
52
- .image-frame img {
53
- height: 300px;
54
- max-height: 300px;
55
- }
56
- }
57
- """
58
 
59
- with gr.Blocks(title="Labelizer") as demo:
 
 
 
 
 
 
60
  dataset = gr.State()
61
  with gr.Sidebar():
62
  gr.Markdown("# 🖼️ Image Labeling Tool")
63
- gr.Markdown("Upload images and add labels to build your dataset.")
64
- upload_button = gr.UploadButton("Upload images", file_count="multiple")
65
- label_all = gr.Button("Labelize all images", interactive=False)
66
- is_labeling_in_progress = gr.State(False)
67
- progressbar = gr.Label("", visible=False, label="Preparing...")
68
- remove_all = gr.Button("Remove all", interactive=False)
 
 
 
 
 
 
 
 
69
 
70
  @gr.render(inputs=[dataset, is_labeling_in_progress])
71
- def render_grid(dataset, is_labeling_in_progress):
72
- if dataset is None:
 
73
  return
74
- cols = 6
75
- rows = len(dataset.images) // cols
76
- if len(dataset.images) > cols * rows:
77
- rows += 1
78
- current = 0
79
- for _ in range(rows):
80
- with gr.Row(equal_height=True):
81
- for _ in range(cols):
82
- with gr.Column(variant="compact", elem_classes=["label_image_box"]):
83
- if current >= len(dataset.images):
84
- break
85
- image = gr.Image(
86
- dataset.images[current]["path"],
87
- type="pil",
88
- container=False,
89
- sources=None,
90
- buttons=["fullscreen"],
91
- )
92
- label = gr.Text(
93
- dataset.images[current]["label"],
94
- placeholder="Description...",
95
- lines=5,
96
- container=False,
97
- interactive=not is_labeling_in_progress,
98
- )
99
- with gr.Row():
100
- button = gr.Button(
101
- "Generate label",
102
- interactive=not is_labeling_in_progress,
103
- )
104
-
105
- button.click(
106
- auto_label,
107
- inputs=[image],
108
- outputs=[image, label],
109
- )
110
-
111
- current += 1
112
-
113
- remove_all.click(lambda: ImageDataset(), inputs=None, outputs=dataset).then(
114
- update_buttons_states, inputs=dataset, outputs=[remove_all, label_all]
115
- )
116
 
117
- upload_button.upload(
118
- uploaded, inputs=[upload_button, dataset], outputs=dataset
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  ).then(
120
  update_buttons_states,
121
- inputs=dataset,
122
- outputs=[remove_all, label_all],
 
 
 
 
 
 
 
123
  )
 
 
124
  label_all.click(
125
- fn=lambda: (
126
- gr.update(interactive=False),
127
- gr.update(interactive=False),
128
- gr.update(visible=True),
129
- True,
130
- ),
131
- inputs=None,
132
  outputs=[
133
  upload_button,
134
  label_all,
135
  progressbar,
 
 
136
  is_labeling_in_progress,
137
  ],
138
  ).then(
@@ -140,20 +189,50 @@ with gr.Blocks(title="Labelizer") as demo:
140
  inputs=[dataset, progressbar],
141
  outputs=[dataset, progressbar],
142
  ).then(
143
- fn=lambda: (
144
- gr.update(interactive=True),
145
- gr.update(interactive=True),
146
- gr.update(visible=False),
147
- False,
148
- ),
149
- inputs=None,
150
  outputs=[
151
  upload_button,
152
  label_all,
153
  progressbar,
 
 
154
  is_labeling_in_progress,
155
  ],
156
  )
157
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  if __name__ == "__main__":
159
- demo.launch(css=CSS)
 
 
 
 
 
 
 
 
4
 
5
  from dataset import ImageDataset
6
  from labelizer import get_task_response
7
+ from packager import create_dataset_zip
8
+ from documentation import DOC_CONTENT
9
 
10
 
11
+ def auto_label(image, imid, dataset):
12
  text = get_task_response("<MORE_DETAILED_CAPTION>", image)
13
+ ds = dataset.update_label(imid, text)
14
+ return text, ds
15
 
16
 
17
+ def label_changed(label, imid, dataset):
18
+ return dataset.update_label(imid, label)
19
+
20
+
21
+ def update_single_label(dataset, label_text, image_id):
22
+ """Update single image label in dataset."""
23
+ return dataset.update_label(image_id, label_text)
24
 
25
 
26
  def uploaded(files, current_dataset):
 
36
  # Generate actual labels
37
  labels_dict = {}
38
  for imdata in progress.tqdm(dataset.images):
39
+ text = get_task_response("<MORE_DETAILED_CAPTION>", Image.open(imdata["path"])) # type: ignore
40
+ labels_dict[imdata["id"]] = text # type: ignore
 
41
 
42
  return dataset.update_all_labels(labels_dict), label
43
 
44
 
45
+ def download_dataset(dataset, organize_in_folders):
46
+ """Create and return zip file for download."""
47
+ zip_path = create_dataset_zip(dataset, organize_in_folders)
48
+ return zip_path
49
+
50
+
51
+ def update_buttons_states(dataset, labeling_in_progress=False):
52
+ """Update all button states based on dataset and labeling progress."""
53
  count = len(dataset.images)
54
  return (
55
+ gr.update(interactive=count == 0 and not labeling_in_progress), # upload
56
+ gr.update(interactive=count > 0 and not labeling_in_progress), # label all
57
+ gr.update(visible=labeling_in_progress), # progressbar
58
+ gr.update(interactive=count > 0 and not labeling_in_progress), # remove all
59
+ gr.update(interactive=count > 0 and not labeling_in_progress), # download
60
+ labeling_in_progress, # is_labeling_in_progress
61
  )
62
 
63
 
64
+ def start_labeling(dataset):
65
+ """Start labeling process - disable buttons and show progress."""
66
+ return update_buttons_states(dataset, labeling_in_progress=True)
 
 
 
 
 
 
 
67
 
68
+
69
+ def finish_labeling(dataset):
70
+ """Finish labeling process - enable buttons and hide progress."""
71
+ return update_buttons_states(dataset, labeling_in_progress=False)
72
+
73
+
74
+ with gr.Blocks(title="Labelizer", fill_width=True) as demo:
75
  dataset = gr.State()
76
  with gr.Sidebar():
77
  gr.Markdown("# 🖼️ Image Labeling Tool")
78
+ with gr.Group():
79
+ gr.Markdown("Upload images and add labels to build your dataset.")
80
+
81
+ upload_button = gr.UploadButton("📁 Upload images", file_count="multiple")
82
+ label_all = gr.Button("🏷️ Labelize all images", interactive=False)
83
+ is_labeling_in_progress = gr.State(False)
84
+ progressbar = gr.Label("", visible=False, label="Preparing...")
85
+ remove_all = gr.Button("🗑️ Remove all", interactive=False)
86
+
87
+ with gr.Group():
88
+ download_button = gr.DownloadButton(
89
+ "💾 Download dataset", interactive=False, size="lg"
90
+ )
91
+ organize_files = gr.Checkbox(label="📂 Organize in folders", value=False)
92
 
93
  @gr.render(inputs=[dataset, is_labeling_in_progress])
94
+ def render_grid(ds, is_labeling_in_progress):
95
+ if not ds or len(ds.images) == 0:
96
+ gr.Markdown(DOC_CONTENT)
97
  return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
+ # Hidden component to trigger label refresh
100
+ gr.Number(value=0, visible=False)
101
+ with gr.Row(equal_height=True):
102
+ for im in ds.images:
103
+ with (
104
+ gr.Column(
105
+ elem_classes="label-image-box",
106
+ preserved_by_key=[
107
+ f"image_{im['id']}",
108
+ f"text_{im['id']}",
109
+ f"button_{im['id']}",
110
+ f"button_clicked_{im['id']}",
111
+ f"label_changed_{im['id']}",
112
+ ],
113
+ ),
114
+ ):
115
+ image = gr.Image(
116
+ im["path"],
117
+ type="pil",
118
+ container=False,
119
+ sources=None,
120
+ buttons=["fullscreen"],
121
+ height=300,
122
+ key=f"image_{im['id']}",
123
+ )
124
+
125
+ label = gr.Text(
126
+ placeholder="Description...",
127
+ lines=5,
128
+ container=False,
129
+ interactive=not is_labeling_in_progress,
130
+ key=f"text_{im['id']}",
131
+ )
132
+
133
+ # Hidden component to store current image ID
134
+ current_image_id = gr.State(value=im["id"])
135
+
136
+ button = gr.Button(
137
+ "✨ Generate label",
138
+ interactive=not is_labeling_in_progress,
139
+ key=f"button_{im['id']}",
140
+ )
141
+
142
+ button.click(
143
+ auto_label,
144
+ inputs=[image, current_image_id, dataset],
145
+ outputs=[label, dataset],
146
+ key=f"button_clicked_{im['id']}",
147
+ )
148
+
149
+ # Update dataset when label is changed
150
+ label.change(
151
+ label_changed,
152
+ inputs=[label, current_image_id, dataset],
153
+ outputs=[dataset],
154
+ key=f"label_changed_{im['id']}",
155
+ )
156
+
157
+ # Remove everything
158
+ remove_all.click(
159
+ lambda: ImageDataset(),
160
+ inputs=None,
161
+ outputs=dataset,
162
  ).then(
163
  update_buttons_states,
164
+ inputs=[dataset, is_labeling_in_progress],
165
+ outputs=[
166
+ upload_button,
167
+ label_all,
168
+ progressbar,
169
+ remove_all,
170
+ download_button,
171
+ is_labeling_in_progress,
172
+ ],
173
  )
174
+
175
+ # Label all images
176
  label_all.click(
177
+ fn=start_labeling,
178
+ inputs=[dataset],
 
 
 
 
 
179
  outputs=[
180
  upload_button,
181
  label_all,
182
  progressbar,
183
+ remove_all,
184
+ download_button,
185
  is_labeling_in_progress,
186
  ],
187
  ).then(
 
189
  inputs=[dataset, progressbar],
190
  outputs=[dataset, progressbar],
191
  ).then(
192
+ fn=finish_labeling,
193
+ inputs=[dataset],
 
 
 
 
 
194
  outputs=[
195
  upload_button,
196
  label_all,
197
  progressbar,
198
+ remove_all,
199
+ download_button,
200
  is_labeling_in_progress,
201
  ],
202
  )
203
 
204
+ # Upload images
205
+ upload_button.upload(
206
+ uploaded,
207
+ inputs=[upload_button, dataset],
208
+ outputs=dataset,
209
+ ).then(
210
+ update_buttons_states,
211
+ inputs=[dataset, is_labeling_in_progress],
212
+ outputs=[
213
+ upload_button,
214
+ label_all,
215
+ progressbar,
216
+ remove_all,
217
+ download_button,
218
+ is_labeling_in_progress,
219
+ ],
220
+ )
221
+
222
+ # Download zip file
223
+ download_button.click(
224
+ download_dataset,
225
+ inputs=[dataset, organize_files],
226
+ outputs=[download_button],
227
+ )
228
+
229
+
230
  if __name__ == "__main__":
231
+ CSS = """
232
+ .gr-group {
233
+ padding: .2rem;
234
+ }
235
+ .label-image-box {
236
+ }
237
+ """
238
+ demo.queue().launch(css=CSS)
dataset.py CHANGED
@@ -5,6 +5,8 @@ class ImageDataset:
5
  def add_images(self, files):
6
  """Return new instance with added images."""
7
  new_images = self.images.copy()
 
 
8
  for _, file in enumerate(files):
9
  new_images.append(
10
  {
 
5
  def add_images(self, files):
6
  """Return new instance with added images."""
7
  new_images = self.images.copy()
8
+ if files is None:
9
+ return self
10
  for _, file in enumerate(files):
11
  new_images.append(
12
  {
documentation.py ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DOC_CONTENT = """
2
+ # 🖼️ Image Labeling Tool - User Guide
3
+
4
+ ## 📋 Overview
5
+
6
+ This tool helps you create labeled image datasets quickly and efficiently using AI-powered automatic labeling. Perfect for machine learning projects, computer vision tasks, and dataset preparation.
7
+
8
+ ---
9
+
10
+ ## 🚀 Getting Started
11
+
12
+ ### Step 1: Upload Images
13
+ - Click **📁 Upload images** button
14
+ - Select multiple image files from your computer
15
+ - Supported formats: JPG, PNG, GIF, and other common image formats
16
+ - Images will appear in a grid layout
17
+
18
+ ### Step 2: Generate Labels
19
+ You have two options for labeling:
20
+
21
+ #### Option A: Individual Labeling
22
+ - Click **✨ Generate label** below any image
23
+ - AI will analyze the image and create a detailed description
24
+ - Edit the generated text if needed
25
+
26
+ #### Option B: Batch Labeling
27
+ - Click **🏷️ Labelize all images** button
28
+ - AI will process all images automatically
29
+ - Progress bar shows labeling status
30
+ - All images get labeled simultaneously
31
+
32
+ ### Step 3: Review and Edit
33
+ - Labels appear as text boxes below each image
34
+ - Click on any text box to edit the description
35
+ - Make changes as needed for your specific use case
36
+
37
+ ### Step 4: Download Dataset
38
+ - Configure download options in the **📦 Download Options** section
39
+ - Choose folder organization preference:
40
+ - ✅ **Organized**: Images in `images/` folder, labels in `labels/` folder
41
+ - ❌ **Flat**: All files in root directory
42
+ - Click **💾 Download dataset** to get your labeled dataset
43
+
44
+ ---
45
+
46
+ ## 🎯 Features
47
+
48
+ ### 🤖 AI-Powered Labeling
49
+ - Uses advanced Florence-2 model for accurate image descriptions
50
+ - Generates detailed, contextual descriptions
51
+ - Supports multiple description styles
52
+
53
+ ### 📊 Dataset Management
54
+ - Add/remove images easily
55
+ - Edit labels manually
56
+ - Real-time progress tracking
57
+ - Efficient batch processing
58
+
59
+ ### 📦 Flexible Export Options
60
+ - **Organized mode**: Perfect for ML frameworks expecting separate folders
61
+ - **Flat mode**: Ideal for simple file organization
62
+ - Automatic text file generation with matching names
63
+
64
+ ### 🎨 User-Friendly Interface
65
+ - Clean, intuitive design
66
+ - Visual progress indicators
67
+ - Responsive layout
68
+ - Emoji-enhanced navigation
69
+
70
+ ---
71
+
72
+ ## 💡 Tips & Best Practices
73
+
74
+ ### For Better Labels
75
+ - Use high-quality, clear images
76
+ - Ensure good lighting and focus
77
+ - Avoid blurry or low-resolution images
78
+ - Consider image diversity for training datasets
79
+
80
+ ### For Efficient Workflow
81
+ - Start with a small batch to test label quality
82
+ - Use batch processing for large datasets
83
+ - Review and edit labels for consistency
84
+ - Download frequently to save progress
85
+
86
+ ### For Dataset Quality
87
+ - Ensure consistent labeling style
88
+ - Add specific details relevant to your use case
89
+ - Remove irrelevant or poor-quality images
90
+ - Test your dataset with your target application
91
+
92
+ ---
93
+
94
+ ## 🔧 Technical Details
95
+
96
+ ### Supported Image Formats
97
+ - JPEG (.jpg, .jpeg)
98
+ - PNG (.png)
99
+ - GIF (.gif)
100
+ - BMP (.bmp)
101
+ - TIFF (.tiff, .tif)
102
+ - WebP (.webp)
103
+
104
+ ### Label Format
105
+ - Plain text files (.txt)
106
+ - UTF-8 encoding
107
+ - Same basename as corresponding image
108
+ - Example: `photo1.jpg` → `photo1.txt`
109
+
110
+ ### File Organization
111
+
112
+ #### Organized Mode
113
+ ```
114
+ dataset.zip
115
+ ├── images/
116
+ │ ├── photo1.jpg
117
+ │ ├── photo2.png
118
+ │ └── ...
119
+ └── labels/
120
+ ├── photo1.txt
121
+ ├── photo2.txt
122
+ └── ...
123
+ ```
124
+
125
+ #### Flat Mode
126
+ ```
127
+ dataset.zip
128
+ ├── photo1.jpg
129
+ ├── photo1.txt
130
+ ├── photo2.png
131
+ ├── photo2.txt
132
+ └── ...
133
+ ```
134
+
135
+ ---
136
+
137
+ ## 🎯 Use Cases
138
+
139
+ ### Machine Learning
140
+ - **Image Classification**: Create labeled datasets for training classifiers
141
+ - **Object Detection**: Generate descriptions for object detection tasks
142
+ - **Image Retrieval**: Build searchable image databases
143
+ - **Data Augmentation**: Create consistent label sets for augmented data
144
+
145
+ ### Content Management
146
+ - **Photo Archives**: Organize personal or professional image collections
147
+ - **E-commerce**: Generate product descriptions automatically
148
+ - **Social Media**: Create alt-text and captions for images
149
+ - **Digital Asset Management**: Tag and organize visual content
150
+
151
+ ### Research & Education
152
+ - **Academic Projects**: Prepare datasets for computer vision research
153
+ - **Teaching Materials**: Create labeled examples for students
154
+ - **Accessibility**: Generate descriptions for visually impaired users
155
+ - **Documentation**: Auto-generate figure descriptions
156
+
157
+ ---
158
+
159
+ ## ⚠️ Important Notes
160
+
161
+ ### Performance
162
+ - Processing time depends on image count and size
163
+ - Batch processing is more efficient than individual labeling
164
+ - Large datasets may take several minutes to process
165
+
166
+ ### Privacy
167
+ - Images are processed locally on your machine
168
+ - No data is sent to external servers during processing
169
+ - Downloaded datasets contain only your images and labels
170
+
171
+ ### Limitations
172
+ - Very large images (>10MB) may take longer to process
173
+ - Complex images with multiple objects may need manual refinement
174
+ - AI accuracy varies with image quality and complexity
175
+
176
+ ---
177
+
178
+ ## 🆘 Troubleshooting
179
+
180
+ ### Common Issues
181
+
182
+ **Images not uploading?**
183
+ - Check file format compatibility
184
+ - Ensure files aren't corrupted
185
+ - Try smaller batches first
186
+
187
+ **Labels seem inaccurate?**
188
+ - Improve image quality and lighting
189
+ - Edit labels manually after generation
190
+ - Use consistent image style for better results
191
+
192
+ **Download not working?**
193
+ - Ensure you have labeled images first
194
+ - Check available disk space
195
+ - Try both folder organization options
196
+
197
+ **Performance slow?**
198
+ - Close other applications
199
+ - Use smaller image batches
200
+ - Consider image size optimization
201
+
202
+ ### Getting Help
203
+ - Check image formats and sizes
204
+ - Ensure stable internet connection for model loading
205
+ - Restart the application if issues persist
206
+
207
+ ---
208
+
209
+ ## 🎉 Ready to Start!
210
+
211
+ 1. **Upload** your images using the 📁 button
212
+ 2. **Generate** labels individually or in batch
213
+ 3. **Review** and edit as needed
214
+ 4. **Download** your labeled dataset
215
+
216
+ Happy labeling! 🚀
217
+ """
packager.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tempfile
2
+ import zipfile
3
+ from typing import Optional
4
+ from pathlib import Path
5
+
6
+ from dataset import ImageDataset
7
+
8
+
9
+ def create_dataset_zip(
10
+ dataset: ImageDataset, organize_in_folders: bool = True
11
+ ) -> Optional[str]:
12
+ """Create a zip file containing images and their text labels.
13
+
14
+ Args:
15
+ dataset: ImageDataset containing images with labels
16
+ organize_in_folders: If True, puts images in 'images/' folder and texts in 'labels/' folder.
17
+ If False, puts all files in the root folder.
18
+
19
+ Returns:
20
+ str: Path to the created zip file, or None if dataset is empty
21
+ """
22
+ if not dataset.images:
23
+ return None
24
+
25
+ # Create a temporary file
26
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".zip")
27
+ temp_path = temp_file.name
28
+ temp_file.close()
29
+
30
+ try:
31
+ with zipfile.ZipFile(temp_path, "w", zipfile.ZIP_DEFLATED) as zip_file:
32
+ for image_data in dataset.images:
33
+ image_path = image_data["path"]
34
+ label = image_data["label"]
35
+
36
+ try:
37
+ # Determine file paths based on organization option
38
+ image_filename = Path(image_path).name
39
+ base_name = Path(image_path).stem
40
+ text_filename = f"{base_name}.txt"
41
+
42
+ if organize_in_folders:
43
+ image_zip_path = f"images/{image_filename}"
44
+ text_zip_path = f"labels/{text_filename}"
45
+ else:
46
+ image_zip_path = image_filename
47
+ text_zip_path = text_filename
48
+
49
+ # Add the image file to zip
50
+ with open(image_path, "rb") as img_file:
51
+ zip_file.writestr(image_zip_path, img_file.read())
52
+
53
+ # Create and add the text file
54
+ zip_file.writestr(text_zip_path, label.encode("utf-8"))
55
+
56
+ except FileNotFoundError:
57
+ # Skip if image file doesn't exist
58
+ continue
59
+ except Exception as e:
60
+ # Log error but continue with other files
61
+ print(f"Error processing {image_path}: {e}")
62
+ continue
63
+
64
+ return temp_path
65
+
66
+ except Exception as e:
67
+ # Clean up temp file if creation failed
68
+ Path(temp_path).unlink(missing_ok=True)
69
+ print(f"Error creating zip file: {e}")
70
+ return None
71
+
72
+ # Create a temporary file
73
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".zip")
74
+ temp_path = temp_file.name
75
+ temp_file.close()
76
+
77
+ try:
78
+ with zipfile.ZipFile(temp_path, "w", zipfile.ZIP_DEFLATED) as zip_file:
79
+ for image_data in dataset.images:
80
+ image_path = image_data["path"]
81
+ label = image_data["label"]
82
+
83
+ try:
84
+ # Add the image file to zip
85
+ with open(image_path, "rb") as img_file:
86
+ zip_file.writestr(
87
+ f"images/{Path(image_path).name}", img_file.read()
88
+ )
89
+
90
+ # Create and add the text file with the same base name
91
+ base_name = Path(image_path).stem
92
+ text_filename = f"labels/{base_name}.txt"
93
+ zip_file.writestr(text_filename, label.encode("utf-8"))
94
+
95
+ except FileNotFoundError:
96
+ # Skip if image file doesn't exist
97
+ continue
98
+ except Exception as e:
99
+ # Log error but continue with other files
100
+ print(f"Error processing {image_path}: {e}")
101
+ continue
102
+
103
+ return temp_path
104
+
105
+ except Exception as e:
106
+ # Clean up temp file if creation failed
107
+ Path(temp_path).unlink(missing_ok=True)
108
+ print(f"Error creating zip file: {e}")
109
+ return None