Spaces:

John6666
/

Prompt-Enhancer

Paused

App Files Files Community

John6666 commited on Jul 13, 2024

Commit

1ffbc6e

verified ·

1 Parent(s): eb0e639

Upload 2 files

Browse files

Files changed (2) hide show

app.py +21 -18
fl2basepromptgen.py +76 -0

app.py CHANGED Viewed

@@ -24,6 +24,9 @@ from tagger import (
 from fl2sd3longcap import (
     predict_tags_fl2_sd3,
 )
 from promptenhancer import prompt_enhancer
@@ -38,7 +41,8 @@ def description_ui():
 - Models: p1atdev's [wd-swinv2-tagger-v3-hf](https://huggingface.co/p1atdev/wd-swinv2-tagger-v3-hf),\
  gokaygokay's [Florence-2-SD3-Captioner](https://huggingface.co/gokaygokay/Florence-2-SD3-Captioner),\
  [Lamini-Prompt-Enchance](https://huggingface.co/gokaygokay/Lamini-Prompt-Enchance),\
- [Lamini-Prompt-Enchance-Long](https://huggingface.co/gokaygokay/Lamini-Prompt-Enchance-Long).
 """
     )
@@ -58,7 +62,7 @@ def main():
                         input_tag_type = gr.Radio(label="Convert tags to", info="danbooru for Animagine, e621 for Pony.", choices=["danbooru", "e621"], value="danbooru")
                         recom_prompt = gr.Radio(label="Insert reccomended prompt", choices=["None", "Animagine", "Pony"], value="None", interactive=True)
                         keep_tags = gr.Radio(label="Remove tags leaving only the following", choices=["body", "dress", "all"], value="all")
-                    image_algorithms = gr.CheckboxGroup(["Use WD Tagger", "Use Florence-2-SD3-Long-Captioner"], label="Algorithms", value=["Use WD Tagger", "Use Florence-2-SD3-Long-Captioner"])
                     generate_from_image_btn = gr.Button(value="GENERATE TAGS FROM IMAGE", size="lg", variant="primary")
                 with gr.Group():
@@ -93,31 +97,30 @@ def main():
                     output_text_pony = gr.TextArea(label="Output tags (Pony e621 style)", interactive=False, show_copy_button=True)
                     copy_btn_pony = gr.Button(value="Copy to clipboard", size="sm", interactive=False)
-        translate_input_prompt_button.click(translate_prompt, [input_general], [input_general])
-        translate_input_prompt_button.click(translate_prompt, [input_character], [input_character])
-        translate_input_prompt_button.click(translate_prompt, [input_copyright], [input_copyright])
         generate_from_image_btn.click(
             predict_tags_wd,
             [input_image, input_general, image_algorithms, general_threshold, character_threshold],
-            [
-                input_copyright,
-                input_character,
-                input_general,
-                copy_input_btn,
-            ],
         ).success(
             predict_tags_fl2_sd3,
             [input_image, input_general, image_algorithms],
             [input_general],
         ).success(
-            remove_specific_prompt, [input_general, keep_tags], [input_general],
         ).success(
-            convert_danbooru_to_e621_prompt, [input_general, input_tag_type], [input_general],
         ).success(
-            insert_recom_prompt, [input_general, dummy_np, recom_prompt], [input_general, dummy_np],
         )
-        copy_input_btn.click(compose_prompt_to_copy, [input_character, input_copyright, input_general], [input_tags_to_copy]).success(
             gradio_copy_text, [input_tags_to_copy], js=COPY_ACTION_JS,
         )
@@ -126,11 +129,11 @@ def main():
             [input_character, input_copyright, input_general, prompt_enhancer_model],
             [output_text, copy_btn, copy_btn_pony],
         ).success(
-            convert_danbooru_to_e621_prompt, [output_text, tag_type], [output_text_pony],
         ).success(
-            insert_recom_prompt, [output_text, dummy_np, recom_animagine], [output_text, dummy_np],
         ).success(
-            insert_recom_prompt, [output_text_pony, dummy_np, recom_pony], [output_text_pony, dummy_np],
         )
         copy_btn.click(gradio_copy_text, [output_text], js=COPY_ACTION_JS)
         copy_btn_pony.click(gradio_copy_text, [output_text_pony], js=COPY_ACTION_JS)

 from fl2sd3longcap import (
     predict_tags_fl2_sd3,
 )
+from fl2basepromptgen import (
+    predict_tags_fl2_base_prompt_gen,
+)
 from promptenhancer import prompt_enhancer
 - Models: p1atdev's [wd-swinv2-tagger-v3-hf](https://huggingface.co/p1atdev/wd-swinv2-tagger-v3-hf),\
  gokaygokay's [Florence-2-SD3-Captioner](https://huggingface.co/gokaygokay/Florence-2-SD3-Captioner),\
  [Lamini-Prompt-Enchance](https://huggingface.co/gokaygokay/Lamini-Prompt-Enchance),\
+ [Lamini-Prompt-Enchance-Long](https://huggingface.co/gokaygokay/Lamini-Prompt-Enchance-Long),\
+ MiaoshouAI's [Florence-2-base-PromptGen](https://huggingface.co/MiaoshouAI/Florence-2-base-PromptGen).
 """
     )
                         input_tag_type = gr.Radio(label="Convert tags to", info="danbooru for Animagine, e621 for Pony.", choices=["danbooru", "e621"], value="danbooru")
                         recom_prompt = gr.Radio(label="Insert reccomended prompt", choices=["None", "Animagine", "Pony"], value="None", interactive=True)
                         keep_tags = gr.Radio(label="Remove tags leaving only the following", choices=["body", "dress", "all"], value="all")
+                    image_algorithms = gr.CheckboxGroup(["Use WD Tagger", "Use Florence-2-SD3-Long-Captioner", "Use Florence-2-base-PromptGen"], label="Algorithms", value=["Use WD Tagger", "Use Florence-2-SD3-Long-Captioner"])
                     generate_from_image_btn = gr.Button(value="GENERATE TAGS FROM IMAGE", size="lg", variant="primary")
                 with gr.Group():
                     output_text_pony = gr.TextArea(label="Output tags (Pony e621 style)", interactive=False, show_copy_button=True)
                     copy_btn_pony = gr.Button(value="Copy to clipboard", size="sm", interactive=False)
+        translate_input_prompt_button.click(translate_prompt, [input_general], [input_general], queue=False)
+        translate_input_prompt_button.click(translate_prompt, [input_character], [input_character], queue=False)
+        translate_input_prompt_button.click(translate_prompt, [input_copyright], [input_copyright], queue=False)
         generate_from_image_btn.click(
             predict_tags_wd,
             [input_image, input_general, image_algorithms, general_threshold, character_threshold],
+            [input_copyright, input_character, input_general, copy_input_btn],
         ).success(
             predict_tags_fl2_sd3,
             [input_image, input_general, image_algorithms],
             [input_general],
         ).success(
+            predict_tags_fl2_base_prompt_gen,
+            [input_image, input_general, image_algorithms],
+            [input_general],
+        ).success(
+            remove_specific_prompt, [input_general, keep_tags], [input_general], queue=False,
         ).success(
+            convert_danbooru_to_e621_prompt, [input_general, input_tag_type], [input_general], queue=False,
         ).success(
+            insert_recom_prompt, [input_general, dummy_np, recom_prompt], [input_general, dummy_np], queue=False,
         )
+        copy_input_btn.click(compose_prompt_to_copy, [input_character, input_copyright, input_general], [input_tags_to_copy], queue=False).success(
             gradio_copy_text, [input_tags_to_copy], js=COPY_ACTION_JS,
         )
             [input_character, input_copyright, input_general, prompt_enhancer_model],
             [output_text, copy_btn, copy_btn_pony],
         ).success(
+            convert_danbooru_to_e621_prompt, [output_text, tag_type], [output_text_pony], queue=False,
         ).success(
+            insert_recom_prompt, [output_text, dummy_np, recom_animagine], [output_text, dummy_np], queue=False,
         ).success(
+            insert_recom_prompt, [output_text_pony, dummy_np, recom_pony], [output_text_pony, dummy_np], queue=False,
         )
         copy_btn.click(gradio_copy_text, [output_text], js=COPY_ACTION_JS)
         copy_btn_pony.click(gradio_copy_text, [output_text_pony], js=COPY_ACTION_JS)

fl2basepromptgen.py ADDED Viewed

	@@ -0,0 +1,76 @@

+from transformers import AutoProcessor, AutoModelForCausalLM
+import spaces
+import re
+from PIL import Image
+import subprocess
+subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
+fl_model = AutoModelForCausalLM.from_pretrained('MiaoshouAI/Florence-2-base-PromptGen', trust_remote_code=True).eval()
+fl_processor = AutoProcessor.from_pretrained('MiaoshouAI/Florence-2-base-PromptGen', trust_remote_code=True)
+def fl_modify_caption(caption: str) -> str:
+    """
+    Removes specific prefixes from captions if present, otherwise returns the original caption.
+    Args:
+        caption (str): A string containing a caption.
+    Returns:
+        str: The caption with the prefix removed if it was present, or the original caption.
+    """
+    # Define the prefixes to remove
+    prefix_substrings = [
+        ('captured from ', ''),
+        ('captured at ', '')
+    ]
+    # Create a regex pattern to match any of the prefixes
+    pattern = '|'.join([re.escape(opening) for opening, _ in prefix_substrings])
+    replacers = {opening.lower(): replacer for opening, replacer in prefix_substrings}
+    # Function to replace matched prefix with its corresponding replacement
+    def replace_fn(match):
+        return replacers[match.group(0).lower()]
+    # Apply the regex to the caption
+    modified_caption = re.sub(pattern, replace_fn, caption, count=1, flags=re.IGNORECASE)
+    # If the caption was modified, return the modified version; otherwise, return the original
+    return modified_caption if modified_caption != caption else caption
+@spaces.GPU
+def fl_run_example(image):
+    task_prompt = "<GENERATE_PROMPT>"
+    prompt = task_prompt + "Describe this image in great detail."
+    # Ensure the image is in RGB mode
+    if image.mode != "RGB":
+        image = image.convert("RGB")
+    inputs = fl_processor(text=prompt, images=image, return_tensors="pt")
+    generated_ids = fl_model.generate(
+        input_ids=inputs["input_ids"],
+        pixel_values=inputs["pixel_values"],
+        max_new_tokens=1024,
+        do_sample=False,
+        num_beams=3
+    )
+    generated_text = fl_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
+    parsed_answer = fl_processor.post_process_generation(generated_text, task=prompt, image_size=(image.width, image.height))
+    return parsed_answer["GENERATE_PROMPT>"]
+    #return fl_modify_caption(parsed_answer["GENERATE_PROMPT>"])
+def predict_tags_fl2_base_prompt_gen(image: Image.Image, input_tags: str, algo: list[str]):
+    def to_list(s):
+        return [x.strip() for x in s.split(",") if not s == ""]
+    def list_uniq(l):
+        return sorted(set(l), key=l.index)
+    if not "Use Florence-2-base-PromptGen" in algo:
+        return input_tags
+    tag_list = list_uniq(to_list(input_tags) + to_list(fl_run_example(image) + ", "))
+    tag_list.remove("")
+    return ", ".join(tag_list)