LPX55 commited on
Commit
63f5ee9
·
verified ·
1 Parent(s): 54c487a

Update app_temp.py

Browse files
Files changed (1) hide show
  1. app_temp.py +86 -40
app_temp.py CHANGED
@@ -18,21 +18,18 @@ import os
18
  import base64
19
  import json
20
 
 
21
  SYSTEM_PROMPT = '''
22
  # Edit Instruction Rewriter
23
  You are a professional edit instruction rewriter. Your task is to generate a precise, concise, and visually achievable professional-level edit instruction based on the user-provided instruction and the image to be edited.
24
-
25
  Please strictly follow the rewriting rules below:
26
-
27
  ## 1. General Principles
28
  - Keep the rewritten prompt **concise and comprehensive**. Avoid overly long sentences and unnecessary descriptive language.
29
  - If the instruction is contradictory, vague, or unachievable, prioritize reasonable inference and correction, and supplement details when necessary.
30
  - Keep the main part of the original instruction unchanged, only enhancing its clarity, rationality, and visual feasibility.
31
  - All added objects or modifications must align with the logic and style of the scene in the input images.
32
  - If multiple sub-images are to be generated, describe the content of each sub-image individually.
33
-
34
  ## 2. Task-Type Handling Rules
35
-
36
  ### 1. Add, Delete, Replace Tasks
37
  - If the instruction is clear (already includes task type, target entity, position, quantity, attributes), preserve the original intent and only refine the grammar.
38
  - If the description is vague, supplement with minimal but sufficient details (category, color, size, orientation, position, etc.). For example:
@@ -40,7 +37,6 @@ Please strictly follow the rewriting rules below:
40
  > Rewritten: "Add a light-gray cat in the bottom-right corner, sitting and facing the camera"
41
  - Remove meaningless instructions: e.g., "Add 0 objects" should be ignored or flagged as invalid.
42
  - For replacement tasks, specify "Replace Y with X" and briefly describe the key visual features of X.
43
-
44
  ### 2. Text Editing Tasks
45
  - All text content must be enclosed in English double quotes `" "`. Keep the original language of the text, and keep the capitalization.
46
  - Both adding new text and replacing existing text are text replacement tasks, For example:
@@ -49,14 +45,12 @@ Please strictly follow the rewriting rules below:
49
  - Replace the visual object to "yy"
50
  - Specify text position, color, and layout only if user has required.
51
  - If font is specified, keep the original language of the font.
52
-
53
  ### 3. Human Editing Tasks
54
  - Make the smallest changes to the given user's prompt.
55
  - If changes to background, action, expression, camera shot, or ambient lighting are required, please list each modification individually.
56
- - **Edits to makeup or facial features / expression must be subtle, not exaggerated, and must preserve the subjects identity consistency.**
57
  > Original: "Add eyebrows to the face"
58
- > Rewritten: "Slightly thicken the persons eyebrows with little change, look natural."
59
-
60
  ### 4. Style Conversion or Enhancement Tasks
61
  - If a style is specified, describe it concisely using key visual features. For example:
62
  > Original: "Disco style"
@@ -67,12 +61,10 @@ Please strictly follow the rewriting rules below:
67
  - Clearly specify the object to be modified. For example:
68
  > Original: Modify the subject in Picture 1 to match the style of Picture 2.
69
  > Rewritten: Change the girl in Picture 1 to the ink-wash style of Picture 2 — rendered in black-and-white watercolor with soft color transitions.
70
-
71
  ### 5. Material Replacement
72
  - Clearly specify the object and the material. For example: "Change the material of the apple to papercut style."
73
  - For text material replacement, use the fixed template:
74
  "Change the material of text "xxxx" to laser style"
75
-
76
  ### 6. Logo/Pattern Editing
77
  - Material replacement should preserve the original shape and structure as much as possible. For example:
78
  > Original: "Convert to sapphire material"
@@ -80,55 +72,96 @@ Please strictly follow the rewriting rules below:
80
  - When migrating logos/patterns to new scenes, ensure shape and structure consistency. For example:
81
  > Original: "Migrate the logo in the image to a new scene"
82
  > Rewritten: "Migrate the logo in the image to a new scene, preserving similar shape and structure"
83
-
84
  ### 7. Multi-Image Tasks
85
- - Rewritten prompts must clearly point out which images element is being modified. For example:
86
  > Original: "Replace the subject of picture 1 with the subject of picture 2"
87
- > Rewritten: "Replace the girl of picture 1 with the boy of picture 2, keeping picture 2s background unchanged"
88
- - For stylization tasks, describe the reference images style in the rewritten prompt, while preserving the visual content of the source image.
89
-
90
  ## 3. Rationale and Logic Check
91
- - Resolve contradictory instructions: e.g., Remove all trees but keep all trees requires logical correction.
92
  - Supplement missing critical information: e.g., if position is unspecified, choose a reasonable area based on composition (near subject, blank space, center/edge, etc.).
93
-
94
  # Output Format Example
95
  ```json
96
  {
97
  "Rewritten": "..."
98
  }
99
  '''
100
- # --- Prompt Enhancement using Hugging Face InferenceClient ---
101
- def polish_prompt_hf(prompt, img_list):
102
  """
103
  Rewrites the prompt using a Hugging Face InferenceClient.
 
104
  """
105
  # Ensure HF_TOKEN is set
106
  api_key = os.environ.get("HF_TOKEN")
107
  if not api_key:
108
  print("Warning: HF_TOKEN not set. Falling back to original prompt.")
109
- return prompt
110
-
 
111
  try:
112
  # Initialize the client
113
- prompt = f"{SYSTEM_PROMPT}\n\nUser Input: {prompt}\n\nRewritten Prompt:"
114
- # Initialize the client
115
  client = InferenceClient(
116
- provider="novita",
117
  api_key=api_key,
118
  )
119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  # Format the messages for the chat completions API
121
- sys_promot = "you are a helpful assistant, you should provide useful answers to users."
122
  messages = [
123
- {"role": "system", "content": sys_promot},
124
- {"role": "user", "content": []}]
125
- for img in img_list:
126
- messages[1]["content"].append(
127
- {"image": f"data:image/png;base64,{encode_image(img)}"})
128
- messages[1]["content"].append({"text": f"{prompt}"})
129
-
 
130
  completion = client.chat.completions.create(
131
- model="Qwen/Qwen3-Next-80B-A3B-Instruct",
132
  messages=messages,
133
  )
134
 
@@ -136,7 +169,7 @@ def polish_prompt_hf(prompt, img_list):
136
  result = completion.choices[0].message.content
137
 
138
  # Try to extract JSON if present
139
- if '{"Rewritten"' in result:
140
  try:
141
  # Clean up the response
142
  result = result.replace('```json', '').replace('```', '')
@@ -153,9 +186,7 @@ def polish_prompt_hf(prompt, img_list):
153
  except Exception as e:
154
  print(f"Error during API call to Hugging Face: {e}")
155
  # Fallback to original prompt if enhancement fails
156
- return prompt
157
-
158
-
159
 
160
  def encode_image(pil_image):
161
  import io
@@ -208,6 +239,12 @@ optimize_pipeline_(pipe, image=[Image.new("RGB", (1024, 1024)), Image.new("RGB",
208
  # --- UI Constants and Helpers ---
209
  MAX_SEED = np.iinfo(np.int32).max
210
 
 
 
 
 
 
 
211
  # --- Main Inference Function (with hardcoded negative prompt) ---
212
  @spaces.GPU(duration=40)
213
  def infer(
@@ -220,7 +257,7 @@ def infer(
220
  height=None,
221
  width=None,
222
  rewrite_prompt=True,
223
- num_images_per_prompt=1,
224
  progress=gr.Progress(track_tqdm=True),
225
  ):
226
  """
@@ -368,9 +405,17 @@ with gr.Blocks(css=css) as demo:
368
  step=8,
369
  value=None,
370
  )
 
 
 
 
 
 
 
 
371
 
372
 
373
- rewrite_prompt = gr.Checkbox(label="Rewrite prompt (being fixed)", value=False)
374
 
375
  # gr.Examples(examples=examples, inputs=[prompt], outputs=[result, seed], fn=infer, cache_examples=False)
376
 
@@ -387,6 +432,7 @@ with gr.Blocks(css=css) as demo:
387
  height,
388
  width,
389
  rewrite_prompt,
 
390
  ],
391
  outputs=[result, seed],
392
  )
 
18
  import base64
19
  import json
20
 
21
+
22
  SYSTEM_PROMPT = '''
23
  # Edit Instruction Rewriter
24
  You are a professional edit instruction rewriter. Your task is to generate a precise, concise, and visually achievable professional-level edit instruction based on the user-provided instruction and the image to be edited.
 
25
  Please strictly follow the rewriting rules below:
 
26
  ## 1. General Principles
27
  - Keep the rewritten prompt **concise and comprehensive**. Avoid overly long sentences and unnecessary descriptive language.
28
  - If the instruction is contradictory, vague, or unachievable, prioritize reasonable inference and correction, and supplement details when necessary.
29
  - Keep the main part of the original instruction unchanged, only enhancing its clarity, rationality, and visual feasibility.
30
  - All added objects or modifications must align with the logic and style of the scene in the input images.
31
  - If multiple sub-images are to be generated, describe the content of each sub-image individually.
 
32
  ## 2. Task-Type Handling Rules
 
33
  ### 1. Add, Delete, Replace Tasks
34
  - If the instruction is clear (already includes task type, target entity, position, quantity, attributes), preserve the original intent and only refine the grammar.
35
  - If the description is vague, supplement with minimal but sufficient details (category, color, size, orientation, position, etc.). For example:
 
37
  > Rewritten: "Add a light-gray cat in the bottom-right corner, sitting and facing the camera"
38
  - Remove meaningless instructions: e.g., "Add 0 objects" should be ignored or flagged as invalid.
39
  - For replacement tasks, specify "Replace Y with X" and briefly describe the key visual features of X.
 
40
  ### 2. Text Editing Tasks
41
  - All text content must be enclosed in English double quotes `" "`. Keep the original language of the text, and keep the capitalization.
42
  - Both adding new text and replacing existing text are text replacement tasks, For example:
 
45
  - Replace the visual object to "yy"
46
  - Specify text position, color, and layout only if user has required.
47
  - If font is specified, keep the original language of the font.
 
48
  ### 3. Human Editing Tasks
49
  - Make the smallest changes to the given user's prompt.
50
  - If changes to background, action, expression, camera shot, or ambient lighting are required, please list each modification individually.
51
+ - **Edits to makeup or facial features / expression must be subtle, not exaggerated, and must preserve the subject's identity consistency.**
52
  > Original: "Add eyebrows to the face"
53
+ > Rewritten: "Slightly thicken the person's eyebrows with little change, look natural."
 
54
  ### 4. Style Conversion or Enhancement Tasks
55
  - If a style is specified, describe it concisely using key visual features. For example:
56
  > Original: "Disco style"
 
61
  - Clearly specify the object to be modified. For example:
62
  > Original: Modify the subject in Picture 1 to match the style of Picture 2.
63
  > Rewritten: Change the girl in Picture 1 to the ink-wash style of Picture 2 — rendered in black-and-white watercolor with soft color transitions.
 
64
  ### 5. Material Replacement
65
  - Clearly specify the object and the material. For example: "Change the material of the apple to papercut style."
66
  - For text material replacement, use the fixed template:
67
  "Change the material of text "xxxx" to laser style"
 
68
  ### 6. Logo/Pattern Editing
69
  - Material replacement should preserve the original shape and structure as much as possible. For example:
70
  > Original: "Convert to sapphire material"
 
72
  - When migrating logos/patterns to new scenes, ensure shape and structure consistency. For example:
73
  > Original: "Migrate the logo in the image to a new scene"
74
  > Rewritten: "Migrate the logo in the image to a new scene, preserving similar shape and structure"
 
75
  ### 7. Multi-Image Tasks
76
+ - Rewritten prompts must clearly point out which image's element is being modified. For example:
77
  > Original: "Replace the subject of picture 1 with the subject of picture 2"
78
+ > Rewritten: "Replace the girl of picture 1 with the boy of picture 2, keeping picture 2's background unchanged"
79
+ - For stylization tasks, describe the reference image's style in the rewritten prompt, while preserving the visual content of the source image.
 
80
  ## 3. Rationale and Logic Check
81
+ - Resolve contradictory instructions: e.g., "Remove all trees but keep all trees" requires logical correction.
82
  - Supplement missing critical information: e.g., if position is unspecified, choose a reasonable area based on composition (near subject, blank space, center/edge, etc.).
 
83
  # Output Format Example
84
  ```json
85
  {
86
  "Rewritten": "..."
87
  }
88
  '''
89
+
90
+ def polish_prompt_hf(original_prompt, img_list):
91
  """
92
  Rewrites the prompt using a Hugging Face InferenceClient.
93
+ Supports multiple images via img_list.
94
  """
95
  # Ensure HF_TOKEN is set
96
  api_key = os.environ.get("HF_TOKEN")
97
  if not api_key:
98
  print("Warning: HF_TOKEN not set. Falling back to original prompt.")
99
+ return original_prompt
100
+ prompt = f"{SYSTEM_PROMPT}\n\nUser Input: {original_prompt}\n\nRewritten Prompt:"
101
+ system_prompt = "you are a helpful assistant, you should provide useful answers to users."
102
  try:
103
  # Initialize the client
 
 
104
  client = InferenceClient(
105
+ provider="nebius",
106
  api_key=api_key,
107
  )
108
 
109
+ # Convert list of images to base64 data URLs
110
+ image_urls = []
111
+ if img_list is not None:
112
+ # Ensure img_list is actually a list
113
+ if not isinstance(img_list, list):
114
+ img_list = [img_list]
115
+
116
+ for img in img_list:
117
+ image_url = None
118
+ # If img is a PIL Image
119
+ if hasattr(img, 'save'): # Check if it's a PIL Image
120
+ buffered = BytesIO()
121
+ img.save(buffered, format="PNG")
122
+ img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
123
+ image_url = f"data:image/png;base64,{img_base64}"
124
+ # If img is already a file path (string)
125
+ elif isinstance(img, str):
126
+ with open(img, "rb") as image_file:
127
+ img_base64 = base64.b64encode(image_file.read()).decode('utf-8')
128
+ image_url = f"data:image/png;base64,{img_base64}"
129
+ else:
130
+ print(f"Warning: Unexpected image type: {type(img)}, skipping...")
131
+ continue
132
+
133
+ if image_url:
134
+ image_urls.append(image_url)
135
+
136
+ # Build the content array with text first, then all images
137
+ content = [
138
+ {
139
+ "type": "text",
140
+ "text": prompt
141
+ }
142
+ ]
143
+
144
+ # Add all images to the content
145
+ for image_url in image_urls:
146
+ content.append({
147
+ "type": "image_url",
148
+ "image_url": {
149
+ "url": image_url
150
+ }
151
+ })
152
+
153
  # Format the messages for the chat completions API
 
154
  messages = [
155
+ {"role": "system", "content": system_prompt},
156
+ {
157
+ "role": "user",
158
+ "content": content
159
+ }
160
+ ]
161
+
162
+ # Call the API
163
  completion = client.chat.completions.create(
164
+ model="Qwen/Qwen2.5-VL-72B-Instruct",
165
  messages=messages,
166
  )
167
 
 
169
  result = completion.choices[0].message.content
170
 
171
  # Try to extract JSON if present
172
+ if '"Rewritten"' in result:
173
  try:
174
  # Clean up the response
175
  result = result.replace('```json', '').replace('```', '')
 
186
  except Exception as e:
187
  print(f"Error during API call to Hugging Face: {e}")
188
  # Fallback to original prompt if enhancement fails
189
+ return original_prompt
 
 
190
 
191
  def encode_image(pil_image):
192
  import io
 
239
  # --- UI Constants and Helpers ---
240
  MAX_SEED = np.iinfo(np.int32).max
241
 
242
+ def use_output_as_input(output_images):
243
+ """Convert output images to input format for the gallery"""
244
+ if output_images is None or len(output_images) == 0:
245
+ return []
246
+ return output_images
247
+
248
  # --- Main Inference Function (with hardcoded negative prompt) ---
249
  @spaces.GPU(duration=40)
250
  def infer(
 
257
  height=None,
258
  width=None,
259
  rewrite_prompt=True,
260
+ num_images_per_prompt=2,
261
  progress=gr.Progress(track_tqdm=True),
262
  ):
263
  """
 
405
  step=8,
406
  value=None,
407
  )
408
+
409
+ num_images = gr.Slider(
410
+ label="Num Images per Prompt",
411
+ minimum=1,
412
+ maximum=4,
413
+ step=1,
414
+ value=2,
415
+ )
416
 
417
 
418
+ rewrite_prompt = gr.Checkbox(label="Rewrite prompt (being fixed)", value=False)
419
 
420
  # gr.Examples(examples=examples, inputs=[prompt], outputs=[result, seed], fn=infer, cache_examples=False)
421
 
 
432
  height,
433
  width,
434
  rewrite_prompt,
435
+ num_images
436
  ],
437
  outputs=[result, seed],
438
  )