Spaces:

gokaygokay
/

SD3.5-with-Captioner

Running on Zero

gokaygokay commited on Oct 23, 2024

Commit

6cfd7ba

verified ·

1 Parent(s): b8d6ed2

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ import random
 import numpy as np
 import os
 import subprocess
 # Install flash-attn
 subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
@@ -31,6 +32,13 @@ enhancer_long = pipeline("summarization", model="gokaygokay/Lamini-Prompt-Enchan
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
 # Florence caption function
 @spaces.GPU
 def florence_caption(image):
@@ -61,7 +69,7 @@ def enhance_prompt(input_prompt):
     enhanced_text = result[0]['summary_text']
     return enhanced_text
-@spaces.GPU(duration=190)
 def process_workflow(image, text_prompt, use_enhancer, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, negative_prompt="", progress=gr.Progress(track_tqdm=True)):
     if image is not None:
         # Convert image to PIL if it's not already
@@ -125,7 +133,7 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(primary_hue="blue", secondar
     with gr.Row():
         with gr.Column(scale=1):
             with gr.Group(elem_classes="input-group"):
-                input_image = gr.Image(label="Input Image (Florence-2 Captioner)")
             with gr.Accordion("Advanced Settings", open=False):
                 text_prompt = gr.Textbox(label="Text Prompt (optional, used if no image is uploaded)")

 import numpy as np
 import os
 import subprocess
+from huggingface_hub import hf_hub_download
 # Install flash-attn
 subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
+hf_hub_download(
+    repo_id="stabilityai/stable-diffusion-3.5-large-turbo",
+    filename="LICENSE.md",
+    local_dir = "./models",
+    token = huggingface_token
+)
 # Florence caption function
 @spaces.GPU
 def florence_caption(image):
     enhanced_text = result[0]['summary_text']
     return enhanced_text
+@spaces.GPU(duration=60)
 def process_workflow(image, text_prompt, use_enhancer, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, negative_prompt="", progress=gr.Progress(track_tqdm=True)):
     if image is not None:
         # Convert image to PIL if it's not already
     with gr.Row():
         with gr.Column(scale=1):
             with gr.Group(elem_classes="input-group"):
+                input_image = gr.Image(label="Input Image (Florence-2 Captioner)", height=512)
             with gr.Accordion("Advanced Settings", open=False):
                 text_prompt = gr.Textbox(label="Text Prompt (optional, used if no image is uploaded)")