Spaces:
				
			
			
	
			
			
		Running
		
			on 
			
			Zero
	
	
	
			
			
	
	
	
	
		
		
		Running
		
			on 
			
			Zero
	Commit 
							
							·
						
						9c8e948
	
1
								Parent(s):
							
							f93e9ec
								
token
Browse files- app.py +135 -158
- requirements.txt +11 -4
    	
        app.py
    CHANGED
    
    | @@ -1,181 +1,158 @@ | |
| 1 | 
             
            import spaces
         | 
| 2 | 
            -
            import json
         | 
| 3 | 
            -
            import subprocess
         | 
| 4 | 
            -
            from llama_cpp import Llama
         | 
| 5 | 
            -
            from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
         | 
| 6 | 
            -
            from llama_cpp_agent.providers import LlamaCppPythonProvider
         | 
| 7 | 
            -
            from llama_cpp_agent.chat_history import BasicChatHistory
         | 
| 8 | 
            -
            from llama_cpp_agent.chat_history.messages import Roles
         | 
| 9 | 
             
            import gradio as gr
         | 
| 10 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 11 |  | 
| 12 | 
            -
             | 
| 13 | 
            -
             | 
| 14 |  | 
| 15 | 
            -
             | 
| 16 | 
            -
             | 
| 17 | 
            -
             | 
| 18 | 
            -
                local_dir = "./models"
         | 
| 19 | 
            -
            )
         | 
| 20 |  | 
| 21 | 
            -
             | 
| 22 | 
            -
                repo_id="jhofseth/Reflection-Llama-3.1-70B-GGUF",
         | 
| 23 | 
            -
                filename="Reflection-Llama-3.1-70B-IQ3_XXS.gguf",
         | 
| 24 | 
            -
                local_dir = "./models"
         | 
| 25 | 
            -
            )
         | 
| 26 |  | 
| 27 | 
            -
             | 
| 28 | 
            -
             | 
| 29 | 
            -
                filename="Reflection-Llama-3.1-70B.imatrix",
         | 
| 30 | 
            -
                local_dir = "./random"
         | 
| 31 | 
            -
            )
         | 
| 32 |  | 
| 33 | 
            -
             | 
| 34 | 
            -
             | 
| 35 | 
            -
             | 
| 36 | 
            -
                else:
         | 
| 37 | 
            -
                    raise ValueError(f"Unsupported model: {model_name}")
         | 
| 38 |  | 
|  | |
|  | |
| 39 |  | 
|  | |
|  | |
|  | |
|  | |
| 40 | 
             
            @spaces.GPU
         | 
| 41 | 
            -
            def  | 
| 42 | 
            -
                 | 
| 43 | 
            -
                 | 
| 44 | 
            -
             | 
| 45 | 
            -
                system_message,
         | 
| 46 | 
            -
                max_tokens,
         | 
| 47 | 
            -
                temperature,
         | 
| 48 | 
            -
                top_p,
         | 
| 49 | 
            -
                top_k,
         | 
| 50 | 
            -
                repeat_penalty,
         | 
| 51 | 
            -
            ):
         | 
| 52 | 
            -
                global llm
         | 
| 53 | 
            -
                global llm_model
         | 
| 54 | 
            -
                
         | 
| 55 | 
            -
                chat_template = get_messages_formatter_type(model)
         | 
| 56 |  | 
| 57 | 
            -
                 | 
| 58 | 
            -
             | 
| 59 | 
            -
             | 
| 60 | 
            -
             | 
| 61 | 
            -
             | 
| 62 | 
            -
             | 
| 63 | 
            -
             | 
| 64 | 
            -
                     | 
| 65 | 
            -
                    llm_model = model
         | 
| 66 | 
            -
                
         | 
| 67 | 
            -
                provider = LlamaCppPythonProvider(llm)
         | 
| 68 | 
            -
             | 
| 69 | 
            -
                agent = LlamaCppAgent(
         | 
| 70 | 
            -
                    provider,
         | 
| 71 | 
            -
                    system_prompt=f"{system_message}",
         | 
| 72 | 
            -
                    predefined_messages_formatter_type=chat_template,
         | 
| 73 | 
            -
                    debug_output=True
         | 
| 74 | 
             
                )
         | 
| 75 | 
            -
                
         | 
| 76 | 
            -
                 | 
| 77 | 
            -
             | 
| 78 | 
            -
             | 
| 79 | 
            -
             | 
| 80 | 
            -
                 | 
| 81 | 
            -
                 | 
| 82 | 
            -
                settings.stream = True
         | 
| 83 |  | 
| 84 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
| 85 |  | 
| 86 | 
            -
             | 
| 87 | 
            -
             | 
| 88 | 
            -
             | 
| 89 | 
            -
             | 
| 90 | 
            -
                     | 
| 91 | 
            -
             | 
| 92 | 
            -
             | 
| 93 | 
            -
             | 
| 94 | 
            -
                     | 
| 95 | 
            -
             | 
| 96 | 
            -
                     | 
| 97 |  | 
| 98 | 
            -
                 | 
| 99 | 
            -
                     | 
| 100 | 
            -
             | 
| 101 | 
            -
             | 
| 102 | 
            -
                     | 
| 103 | 
            -
             | 
| 104 | 
            -
                )
         | 
| 105 |  | 
| 106 | 
            -
                 | 
| 107 | 
            -
             | 
| 108 | 
            -
                     | 
| 109 | 
            -
                     | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 110 |  | 
| 111 | 
            -
             | 
| 112 | 
            -
             | 
| 113 | 
            -
             | 
| 114 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 115 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 116 | 
             
            </center></p>
         | 
| 117 | 
             
            """
         | 
| 118 |  | 
| 119 | 
            -
             | 
| 120 | 
            -
                 | 
| 121 | 
            -
                 | 
| 122 | 
            -
             | 
| 123 | 
            -
             | 
| 124 | 
            -
             | 
| 125 | 
            -
             | 
| 126 | 
            -
                         | 
| 127 | 
            -
                         | 
| 128 | 
            -
             | 
| 129 | 
            -
             | 
| 130 | 
            -
             | 
| 131 | 
            -
             | 
| 132 | 
            -
             | 
| 133 | 
            -
             | 
| 134 | 
            -
             | 
| 135 | 
            -
             | 
| 136 | 
            -
             | 
| 137 | 
            -
                         | 
| 138 | 
            -
             | 
| 139 | 
            -
                     | 
| 140 | 
            -
             | 
| 141 | 
            -
                         | 
| 142 | 
            -
             | 
| 143 | 
            -
             | 
| 144 | 
            -
             | 
| 145 | 
            -
             | 
| 146 | 
            -
             | 
| 147 | 
            -
             | 
| 148 | 
            -
             | 
| 149 | 
            -
                         | 
| 150 | 
            -
                         | 
| 151 | 
            -
             | 
| 152 | 
            -
                     | 
| 153 | 
            -
                ],
         | 
| 154 | 
            -
                theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
         | 
| 155 | 
            -
                    body_background_fill_dark="#16141c",
         | 
| 156 | 
            -
                    block_background_fill_dark="#16141c",
         | 
| 157 | 
            -
                    block_border_width="1px",
         | 
| 158 | 
            -
                    block_title_background_fill_dark="#1e1c26",
         | 
| 159 | 
            -
                    input_background_fill_dark="#292733",
         | 
| 160 | 
            -
                    button_secondary_background_fill_dark="#24212b",
         | 
| 161 | 
            -
                    border_color_accent_dark="#343140",
         | 
| 162 | 
            -
                    border_color_primary_dark="#343140",
         | 
| 163 | 
            -
                    background_fill_secondary_dark="#16141c",
         | 
| 164 | 
            -
                    color_accent_soft_dark="transparent",
         | 
| 165 | 
            -
                    code_background_fill_dark="#292733",
         | 
| 166 | 
            -
                ),
         | 
| 167 | 
            -
                retry_btn="Retry",
         | 
| 168 | 
            -
                undo_btn="Undo",
         | 
| 169 | 
            -
                clear_btn="Clear",
         | 
| 170 | 
            -
                submit_btn="Send",
         | 
| 171 | 
            -
                title="Reflection Llama-3.1 70B",
         | 
| 172 | 
            -
                description=description,
         | 
| 173 | 
            -
                chatbot=gr.Chatbot(
         | 
| 174 | 
            -
                    scale=1, 
         | 
| 175 | 
            -
                    likeable=False,
         | 
| 176 | 
            -
                    show_copy_button=True
         | 
| 177 | 
             
                )
         | 
| 178 | 
            -
            )
         | 
| 179 |  | 
| 180 | 
            -
             | 
| 181 | 
            -
                demo.launch()
         | 
|  | |
| 1 | 
             
            import spaces
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 2 | 
             
            import gradio as gr
         | 
| 3 | 
            +
            import torch
         | 
| 4 | 
            +
            from PIL import Image
         | 
| 5 | 
            +
            from transformers import AutoProcessor, AutoModelForCausalLM, pipeline
         | 
| 6 | 
            +
            from diffusers import DiffusionPipeline
         | 
| 7 | 
            +
            import random
         | 
| 8 | 
            +
            import numpy as np
         | 
| 9 | 
            +
            import os
         | 
| 10 | 
            +
            import subprocess
         | 
| 11 |  | 
| 12 | 
            +
            # Install flash-attn
         | 
| 13 | 
            +
            subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
         | 
| 14 |  | 
| 15 | 
            +
            # Initialize models
         | 
| 16 | 
            +
            device = "cuda" if torch.cuda.is_available() else "cpu"
         | 
| 17 | 
            +
            dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
         | 
|  | |
|  | |
| 18 |  | 
| 19 | 
            +
            huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
         | 
|  | |
|  | |
|  | |
|  | |
| 20 |  | 
| 21 | 
            +
            # SD3.5 model
         | 
| 22 | 
            +
            pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-3.5-large", torch_dtype=dtype, use_safetensors=True, variant="fp16", token=huggingface_token).to(device)
         | 
|  | |
|  | |
|  | |
| 23 |  | 
| 24 | 
            +
            # Initialize Florence model
         | 
| 25 | 
            +
            florence_model = AutoModelForCausalLM.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True).to(device).eval()
         | 
| 26 | 
            +
            florence_processor = AutoProcessor.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True)
         | 
|  | |
|  | |
| 27 |  | 
| 28 | 
            +
            # Prompt Enhancer
         | 
| 29 | 
            +
            enhancer_long = pipeline("summarization", model="gokaygokay/Lamini-Prompt-Enchance-Long", device=device)
         | 
| 30 |  | 
| 31 | 
            +
            MAX_SEED = np.iinfo(np.int32).max
         | 
| 32 | 
            +
            MAX_IMAGE_SIZE = 1024
         | 
| 33 | 
            +
             | 
| 34 | 
            +
            # Florence caption function
         | 
| 35 | 
             
            @spaces.GPU
         | 
| 36 | 
            +
            def florence_caption(image):
         | 
| 37 | 
            +
                # Convert image to PIL if it's not already
         | 
| 38 | 
            +
                if not isinstance(image, Image.Image):
         | 
| 39 | 
            +
                    image = Image.fromarray(image)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 40 |  | 
| 41 | 
            +
                inputs = florence_processor(text="<MORE_DETAILED_CAPTION>", images=image, return_tensors="pt").to(device)
         | 
| 42 | 
            +
                generated_ids = florence_model.generate(
         | 
| 43 | 
            +
                    input_ids=inputs["input_ids"],
         | 
| 44 | 
            +
                    pixel_values=inputs["pixel_values"],
         | 
| 45 | 
            +
                    max_new_tokens=1024,
         | 
| 46 | 
            +
                    early_stopping=False,
         | 
| 47 | 
            +
                    do_sample=False,
         | 
| 48 | 
            +
                    num_beams=3,
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 49 | 
             
                )
         | 
| 50 | 
            +
                generated_text = florence_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
         | 
| 51 | 
            +
                parsed_answer = florence_processor.post_process_generation(
         | 
| 52 | 
            +
                    generated_text,
         | 
| 53 | 
            +
                    task="<MORE_DETAILED_CAPTION>",
         | 
| 54 | 
            +
                    image_size=(image.width, image.height)
         | 
| 55 | 
            +
                )
         | 
| 56 | 
            +
                return parsed_answer["<MORE_DETAILED_CAPTION>"]
         | 
|  | |
| 57 |  | 
| 58 | 
            +
            # Prompt Enhancer function
         | 
| 59 | 
            +
            def enhance_prompt(input_prompt):
         | 
| 60 | 
            +
                result = enhancer_long("Enhance the description: " + input_prompt)
         | 
| 61 | 
            +
                enhanced_text = result[0]['summary_text']
         | 
| 62 | 
            +
                return enhanced_text
         | 
| 63 |  | 
| 64 | 
            +
            @spaces.GPU(duration=190)
         | 
| 65 | 
            +
            def process_workflow(image, text_prompt, use_enhancer, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, negative_prompt="", progress=gr.Progress(track_tqdm=True)):
         | 
| 66 | 
            +
                if image is not None:
         | 
| 67 | 
            +
                    # Convert image to PIL if it's not already
         | 
| 68 | 
            +
                    if not isinstance(image, Image.Image):
         | 
| 69 | 
            +
                        image = Image.fromarray(image)
         | 
| 70 | 
            +
                    
         | 
| 71 | 
            +
                    prompt = florence_caption(image)
         | 
| 72 | 
            +
                    print(prompt)
         | 
| 73 | 
            +
                else:
         | 
| 74 | 
            +
                    prompt = text_prompt
         | 
| 75 |  | 
| 76 | 
            +
                if use_enhancer:
         | 
| 77 | 
            +
                    prompt = enhance_prompt(prompt)
         | 
| 78 | 
            +
                
         | 
| 79 | 
            +
                if randomize_seed:
         | 
| 80 | 
            +
                    seed = random.randint(0, MAX_SEED)
         | 
| 81 | 
            +
                
         | 
| 82 | 
            +
                generator = torch.Generator(device=device).manual_seed(seed)
         | 
| 83 |  | 
| 84 | 
            +
                image = pipe(
         | 
| 85 | 
            +
                    prompt=prompt,
         | 
| 86 | 
            +
                    negative_prompt=negative_prompt,
         | 
| 87 | 
            +
                    generator=generator,
         | 
| 88 | 
            +
                    num_inference_steps=num_inference_steps,
         | 
| 89 | 
            +
                    width=width,
         | 
| 90 | 
            +
                    height=height,
         | 
| 91 | 
            +
                    guidance_scale=guidance_scale
         | 
| 92 | 
            +
                ).images[0]
         | 
| 93 | 
            +
                
         | 
| 94 | 
            +
                return image, prompt, seed
         | 
| 95 |  | 
| 96 | 
            +
            custom_css = """
         | 
| 97 | 
            +
            .input-group, .output-group {
         | 
| 98 | 
            +
                border: 1px solid #e0e0e0;
         | 
| 99 | 
            +
                border-radius: 10px;
         | 
| 100 | 
            +
                padding: 20px;
         | 
| 101 | 
            +
                margin-bottom: 20px;
         | 
| 102 | 
            +
                background-color: #f9f9f9;
         | 
| 103 | 
            +
            }
         | 
| 104 | 
            +
            .submit-btn {
         | 
| 105 | 
            +
                background-color: #2980b9 !important;
         | 
| 106 | 
            +
                color: white !important;
         | 
| 107 | 
            +
            }
         | 
| 108 | 
            +
            .submit-btn:hover {
         | 
| 109 | 
            +
                background-color: #3498db !important;
         | 
| 110 | 
            +
            }
         | 
| 111 | 
            +
            """
         | 
| 112 |  | 
| 113 | 
            +
            title = """<h1 align="center">Stable Diffusion 3.5 with Florence-2 Captioner and Prompt Enhancer</h1>
         | 
| 114 | 
            +
            <p><center>
         | 
| 115 | 
            +
            <a href="https://huggingface.co/stabilityai/stable-diffusion-3.5-large" target="_blank">[Stable Diffusion 3.5 Model]</a>
         | 
| 116 | 
            +
            <a href="https://huggingface.co/microsoft/Florence-2-base" target="_blank">[Florence-2 Model]</a>
         | 
| 117 | 
            +
            <a href="https://huggingface.co/gokaygokay/Lamini-Prompt-Enchance-Long" target="_blank">[Prompt Enhancer Long]</a>
         | 
| 118 | 
            +
            <p align="center">Create long prompts from images or enhance your short prompts with prompt enhancer</p>
         | 
| 119 | 
             
            </center></p>
         | 
| 120 | 
             
            """
         | 
| 121 |  | 
| 122 | 
            +
            with gr.Blocks(css=custom_css, theme=gr.themes.Soft(primary_hue="blue", secondary_hue="gray")) as demo:
         | 
| 123 | 
            +
                gr.HTML(title)
         | 
| 124 | 
            +
                
         | 
| 125 | 
            +
                with gr.Row():
         | 
| 126 | 
            +
                    with gr.Column(scale=1):
         | 
| 127 | 
            +
                        with gr.Group(elem_classes="input-group"):
         | 
| 128 | 
            +
                            input_image = gr.Image(label="Input Image (Florence-2 Captioner)")
         | 
| 129 | 
            +
                        
         | 
| 130 | 
            +
                        with gr.Accordion("Advanced Settings", open=False):
         | 
| 131 | 
            +
                            text_prompt = gr.Textbox(label="Text Prompt (optional, used if no image is uploaded)")
         | 
| 132 | 
            +
                            negative_prompt = gr.Textbox(label="Negative Prompt")
         | 
| 133 | 
            +
                            use_enhancer = gr.Checkbox(label="Use Prompt Enhancer", value=False)
         | 
| 134 | 
            +
                            seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
         | 
| 135 | 
            +
                            randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
         | 
| 136 | 
            +
                            width = gr.Slider(label="Width", minimum=512, maximum=MAX_IMAGE_SIZE, step=32, value=1024)
         | 
| 137 | 
            +
                            height = gr.Slider(label="Height", minimum=512, maximum=MAX_IMAGE_SIZE, step=32, value=1024)
         | 
| 138 | 
            +
                            guidance_scale = gr.Slider(label="Guidance Scale", minimum=0.0, maximum=7.5, step=0.1, value=4.5)
         | 
| 139 | 
            +
                            num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=50, step=1, value=40)
         | 
| 140 | 
            +
                        
         | 
| 141 | 
            +
                        generate_btn = gr.Button("Generate Image", elem_classes="submit-btn")
         | 
| 142 | 
            +
                    
         | 
| 143 | 
            +
                    with gr.Column(scale=1):
         | 
| 144 | 
            +
                        with gr.Group(elem_classes="output-group"):
         | 
| 145 | 
            +
                            output_image = gr.Image(label="Result", elem_id="gallery", show_label=False)
         | 
| 146 | 
            +
                            final_prompt = gr.Textbox(label="Final Prompt Used")
         | 
| 147 | 
            +
                            used_seed = gr.Number(label="Seed Used")
         | 
| 148 | 
            +
                
         | 
| 149 | 
            +
                generate_btn.click(
         | 
| 150 | 
            +
                    fn=process_workflow,
         | 
| 151 | 
            +
                    inputs=[
         | 
| 152 | 
            +
                        input_image, text_prompt, use_enhancer, seed, randomize_seed,
         | 
| 153 | 
            +
                        width, height, guidance_scale, num_inference_steps, negative_prompt
         | 
| 154 | 
            +
                    ],
         | 
| 155 | 
            +
                    outputs=[output_image, final_prompt, used_seed]
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 156 | 
             
                )
         | 
|  | |
| 157 |  | 
| 158 | 
            +
            demo.launch(debug=True)
         | 
|  | 
    	
        requirements.txt
    CHANGED
    
    | @@ -1,4 +1,11 @@ | |
| 1 | 
            -
             | 
| 2 | 
            -
             | 
| 3 | 
            -
             | 
| 4 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            accelerate
         | 
| 2 | 
            +
            diffusers
         | 
| 3 | 
            +
            torch
         | 
| 4 | 
            +
            transformers
         | 
| 5 | 
            +
            git+https://github.com/huggingface/diffusers.git
         | 
| 6 | 
            +
            sentencepiece
         | 
| 7 | 
            +
            spaces
         | 
| 8 | 
            +
            xformers
         | 
| 9 | 
            +
            sentencepiece
         | 
| 10 | 
            +
            timm
         | 
| 11 | 
            +
            einops
         |