|
|
import gradio as gr |
|
|
import numpy as np |
|
|
import random |
|
|
import torch |
|
|
import spaces |
|
|
|
|
|
from PIL import Image |
|
|
from diffusers import FlowMatchEulerDiscreteScheduler |
|
|
from optimization import optimize_pipeline_ |
|
|
from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline |
|
|
from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel |
|
|
from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3 |
|
|
|
|
|
import math |
|
|
from huggingface_hub import hf_hub_download |
|
|
from safetensors.torch import load_file |
|
|
|
|
|
from PIL import Image |
|
|
import os |
|
|
import gradio as gr |
|
|
from gradio_client import Client, handle_file |
|
|
import tempfile |
|
|
from huggingface_hub import InferenceClient |
|
|
|
|
|
|
|
|
|
|
|
dtype = torch.bfloat16 |
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
pipe = QwenImageEditPlusPipeline.from_pretrained("Qwen/Qwen-Image-Edit-2509", |
|
|
transformer= QwenImageTransformer2DModel.from_pretrained("linoyts/Qwen-Image-Edit-Rapid-AIO", |
|
|
subfolder='transformer', |
|
|
torch_dtype=dtype, |
|
|
device_map='cuda'),torch_dtype=dtype).to(device) |
|
|
|
|
|
|
|
|
pipe.load_lora_weights( |
|
|
"dx8152/Qwen-Image-Edit-2509-Relight", |
|
|
weight_name="Qwen-Edit-Relight.safetensors", adapter_name="relight" |
|
|
) |
|
|
|
|
|
pipe.set_adapters(["relight"], adapter_weights=[1.]) |
|
|
pipe.fuse_lora(adapter_names=["relight"], lora_scale=1.25) |
|
|
pipe.unload_lora_weights() |
|
|
|
|
|
pipe.transformer.__class__ = QwenImageTransformer2DModel |
|
|
pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3()) |
|
|
|
|
|
optimize_pipeline_(pipe, image=[Image.new("RGB", (1024, 1024)), Image.new("RGB", (1024, 1024))], prompt="prompt") |
|
|
|
|
|
|
|
|
MAX_SEED = np.iinfo(np.int32).max |
|
|
|
|
|
|
|
|
translation_client = InferenceClient( |
|
|
api_key=os.environ.get("HF_TOKEN"), |
|
|
) |
|
|
|
|
|
def translate_to_chinese(text: str) -> str: |
|
|
"""Translate any language text to Chinese using Qwen API.""" |
|
|
if not text or not text.strip(): |
|
|
return "" |
|
|
|
|
|
|
|
|
chinese_chars = sum(1 for char in text if '\u4e00' <= char <= '\u9fff') |
|
|
if chinese_chars / max(len(text), 1) > 0.5: |
|
|
|
|
|
return text |
|
|
|
|
|
try: |
|
|
completion = translation_client.chat.completions.create( |
|
|
model="Qwen/Qwen3-Next-80B-A3B-Instruct:novita", |
|
|
messages=[ |
|
|
{ |
|
|
"role": "system", |
|
|
"content": "You are a professional translator. Translate the user's text to Chinese. Only output the translated text, nothing else." |
|
|
}, |
|
|
{ |
|
|
"role": "user", |
|
|
"content": f"Translate this to Chinese: {text}" |
|
|
} |
|
|
], |
|
|
max_tokens=500, |
|
|
) |
|
|
|
|
|
translated = completion.choices[0].message.content.strip() |
|
|
print(f"Translated '{text}' to '{translated}'") |
|
|
return translated |
|
|
except Exception as e: |
|
|
print(f"Translation error: {e}") |
|
|
|
|
|
return text |
|
|
|
|
|
def _generate_video_segment(input_image_path: str, output_image_path: str, prompt: str, request: gr.Request) -> str: |
|
|
"""Generates a single video segment using the external service.""" |
|
|
x_ip_token = request.headers['x-ip-token'] |
|
|
video_client = Client("multimodalart/wan-2-2-first-last-frame", headers={"x-ip-token": x_ip_token}) |
|
|
result = video_client.predict( |
|
|
start_image_pil=handle_file(input_image_path), |
|
|
end_image_pil=handle_file(output_image_path), |
|
|
prompt=prompt, api_name="/generate_video", |
|
|
) |
|
|
return result[0]["video"] |
|
|
|
|
|
def build_relight_prompt(light_type, light_direction, light_intensity, prompt): |
|
|
"""Build the relighting prompt based on user selections.""" |
|
|
|
|
|
|
|
|
if prompt and prompt.strip(): |
|
|
translated = translate_to_chinese(prompt) |
|
|
|
|
|
if "重新照明" not in translated: |
|
|
return f"重新照明,{translated}" |
|
|
return translated |
|
|
|
|
|
|
|
|
prompt_parts = ["重新照明"] |
|
|
|
|
|
|
|
|
light_descriptions = { |
|
|
"soft_window": "使用窗帘透光(柔和漫射)的光线", |
|
|
"golden_hour": "使用金色黄昏的温暖光线", |
|
|
"studio": "使用专业摄影棚的均匀光线", |
|
|
"dramatic": "使用戏剧性的高对比度光线", |
|
|
"natural": "使用自然日光", |
|
|
"neon": "使用霓虹灯光效果", |
|
|
"candlelight": "使用烛光的温暖氛围", |
|
|
"moonlight": "使用月光的冷色调", |
|
|
} |
|
|
|
|
|
|
|
|
direction_descriptions = { |
|
|
"front": "从正面照射", |
|
|
"side": "从侧面照射", |
|
|
"back": "从背后照射", |
|
|
"top": "从上方照射", |
|
|
"bottom": "从下方照射", |
|
|
} |
|
|
|
|
|
|
|
|
intensity_descriptions = { |
|
|
"soft": "柔和强度", |
|
|
"medium": "中等强度", |
|
|
"strong": "强烈强度", |
|
|
} |
|
|
|
|
|
|
|
|
if light_type != "none": |
|
|
prompt_parts.append(light_descriptions.get(light_type, "")) |
|
|
|
|
|
if light_direction != "none": |
|
|
prompt_parts.append(direction_descriptions.get(light_direction, "")) |
|
|
|
|
|
if light_intensity != "none": |
|
|
prompt_parts.append(intensity_descriptions.get(light_intensity, "")) |
|
|
|
|
|
final_prompt = ",".join([p for p in prompt_parts if p]) |
|
|
|
|
|
|
|
|
if len(prompt_parts) > 1: |
|
|
final_prompt += "对图片进行重新照明" |
|
|
|
|
|
return final_prompt if len(prompt_parts) > 1 else "重新照明,使用自然光线对图片进行重新照明" |
|
|
|
|
|
|
|
|
@spaces.GPU |
|
|
def infer_relight( |
|
|
image, |
|
|
light_type, |
|
|
light_direction, |
|
|
light_intensity, |
|
|
prompt, |
|
|
seed, |
|
|
randomize_seed, |
|
|
true_guidance_scale, |
|
|
num_inference_steps, |
|
|
height, |
|
|
width, |
|
|
prev_output = None, |
|
|
progress=gr.Progress(track_tqdm=True) |
|
|
): |
|
|
final_prompt = build_relight_prompt(light_type, light_direction, light_intensity, prompt) |
|
|
print(f"Generated Prompt: {final_prompt}") |
|
|
|
|
|
if randomize_seed: |
|
|
seed = random.randint(0, MAX_SEED) |
|
|
generator = torch.Generator(device=device).manual_seed(seed) |
|
|
|
|
|
|
|
|
pil_images = [] |
|
|
if image is not None: |
|
|
if isinstance(image, Image.Image): |
|
|
pil_images.append(image.convert("RGB")) |
|
|
elif hasattr(image, "name"): |
|
|
pil_images.append(Image.open(image.name).convert("RGB")) |
|
|
elif prev_output: |
|
|
pil_images.append(prev_output.convert("RGB")) |
|
|
|
|
|
if len(pil_images) == 0: |
|
|
raise gr.Error("Please upload an image first.") |
|
|
|
|
|
result = pipe( |
|
|
image=pil_images, |
|
|
prompt=final_prompt, |
|
|
height=height if height != 0 else None, |
|
|
width=width if width != 0 else None, |
|
|
num_inference_steps=num_inference_steps, |
|
|
generator=generator, |
|
|
true_cfg_scale=true_guidance_scale, |
|
|
num_images_per_prompt=1, |
|
|
).images[0] |
|
|
|
|
|
return result, seed, final_prompt |
|
|
|
|
|
def create_video_between_images(input_image, output_image, prompt: str, request: gr.Request) -> str: |
|
|
"""Create a video between the input and output images.""" |
|
|
if input_image is None or output_image is None: |
|
|
raise gr.Error("Both input and output images are required to create a video.") |
|
|
|
|
|
try: |
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp: |
|
|
input_image.save(tmp.name) |
|
|
input_image_path = tmp.name |
|
|
|
|
|
output_pil = Image.fromarray(output_image.astype('uint8')) |
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp: |
|
|
output_pil.save(tmp.name) |
|
|
output_image_path = tmp.name |
|
|
|
|
|
video_path = _generate_video_segment( |
|
|
input_image_path, |
|
|
output_image_path, |
|
|
prompt if prompt else "Relighting transformation", |
|
|
request |
|
|
) |
|
|
return video_path |
|
|
except Exception as e: |
|
|
raise gr.Error(f"Video generation failed: {e}") |
|
|
|
|
|
|
|
|
|
|
|
css = '''#col-container { max-width: 800px; margin: 0 auto; } |
|
|
.dark .progress-text{color: white !important} |
|
|
#examples{max-width: 800px; margin: 0 auto; }''' |
|
|
|
|
|
def reset_all(): |
|
|
return ["none", "none", "none", "", False, True] |
|
|
|
|
|
def end_reset(): |
|
|
return False |
|
|
|
|
|
def update_dimensions_on_upload(image): |
|
|
if image is None: |
|
|
return 1024, 1024 |
|
|
|
|
|
original_width, original_height = image.size |
|
|
|
|
|
if original_width > original_height: |
|
|
new_width = 1024 |
|
|
aspect_ratio = original_height / original_width |
|
|
new_height = int(new_width * aspect_ratio) |
|
|
else: |
|
|
new_height = 1024 |
|
|
aspect_ratio = original_width / original_height |
|
|
new_width = int(new_height * aspect_ratio) |
|
|
|
|
|
|
|
|
new_width = (new_width // 8) * 8 |
|
|
new_height = (new_height // 8) * 8 |
|
|
|
|
|
return new_width, new_height |
|
|
|
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Citrus(), css=css) as demo: |
|
|
with gr.Column(elem_id="col-container"): |
|
|
gr.Markdown("## 💡 Qwen Image Edit — Relighting Control") |
|
|
gr.Markdown(""" |
|
|
Qwen Image Edit 2509 for Image Relighting ✨ |
|
|
Using [dx8152's Qwen-Image-Edit-2509-Relight LoRA](https://huggingface.co/dx8152/Qwen-Image-Edit-2509-Relight) and [linoyts/Qwen-Image-Edit-Rapid-AIO](https://huggingface.co/linoyts/Qwen-Image-Edit-Rapid-AIO) for 4-step inference 💨 |
|
|
""" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
image = gr.Image(label="Input Image", type="pil") |
|
|
prev_output = gr.Image(value=None, visible=False) |
|
|
is_reset = gr.Checkbox(value=False, visible=False) |
|
|
|
|
|
with gr.Tab("Lighting Controls"): |
|
|
light_type = gr.Dropdown( |
|
|
label="Light Type", |
|
|
choices=[ |
|
|
("None", "none"), |
|
|
("Soft Window Light (柔和窗光)", "soft_window"), |
|
|
("Golden Hour (金色黄昏)", "golden_hour"), |
|
|
("Studio Lighting (摄影棚灯光)", "studio"), |
|
|
("Dramatic (戏剧性)", "dramatic"), |
|
|
("Natural Daylight (自然日光)", "natural"), |
|
|
("Neon (霓虹灯)", "neon"), |
|
|
("Candlelight (烛光)", "candlelight"), |
|
|
("Moonlight (月光)", "moonlight"), |
|
|
], |
|
|
value="none" |
|
|
) |
|
|
|
|
|
light_direction = gr.Dropdown( |
|
|
label="Light Direction", |
|
|
choices=[ |
|
|
("None", "none"), |
|
|
("Front (正面)", "front"), |
|
|
("Side (侧面)", "side"), |
|
|
("Back (背光)", "back"), |
|
|
("Top (上方)", "top"), |
|
|
("Bottom (下方)", "bottom"), |
|
|
], |
|
|
value="none" |
|
|
) |
|
|
|
|
|
light_intensity = gr.Dropdown( |
|
|
label="Light Intensity", |
|
|
choices=[ |
|
|
("None", "none"), |
|
|
("Soft (柔和)", "soft"), |
|
|
("Medium (中等)", "medium"), |
|
|
("Strong (强烈)", "strong"), |
|
|
], |
|
|
value="none" |
|
|
) |
|
|
|
|
|
with gr.Tab("Custom Prompt"): |
|
|
prompt = gr.Textbox( |
|
|
label="Relighting Prompt", |
|
|
placeholder="Example: Add warm sunset lighting from the right", |
|
|
lines=3 |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
reset_btn = gr.Button("Reset") |
|
|
run_btn = gr.Button("Generate", variant="primary") |
|
|
|
|
|
with gr.Accordion("Advanced Settings", open=False): |
|
|
seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0) |
|
|
randomize_seed = gr.Checkbox(label="Randomize Seed", value=True) |
|
|
true_guidance_scale = gr.Slider(label="True Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0) |
|
|
num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=40, step=1, value=4) |
|
|
height = gr.Slider(label="Height", minimum=256, maximum=2048, step=8, value=1024) |
|
|
width = gr.Slider(label="Width", minimum=256, maximum=2048, step=8, value=1024) |
|
|
|
|
|
with gr.Column(): |
|
|
result = gr.Image(label="Output Image", interactive=False) |
|
|
prompt_preview = gr.Textbox(label="Processed Prompt", interactive=False) |
|
|
create_video_button = gr.Button("🎥 Create Video Between Images", variant="secondary", visible=False) |
|
|
with gr.Group(visible=False) as video_group: |
|
|
video_output = gr.Video(label="Generated Video", show_download_button=True, autoplay=True) |
|
|
|
|
|
inputs = [ |
|
|
image, light_type, light_direction, light_intensity, prompt, |
|
|
seed, randomize_seed, true_guidance_scale, num_inference_steps, height, width, prev_output |
|
|
] |
|
|
outputs = [result, seed, prompt_preview] |
|
|
|
|
|
|
|
|
reset_btn.click( |
|
|
fn=reset_all, |
|
|
inputs=None, |
|
|
outputs=[light_type, light_direction, light_intensity, prompt, is_reset], |
|
|
queue=False |
|
|
).then(fn=end_reset, inputs=None, outputs=[is_reset], queue=False) |
|
|
|
|
|
|
|
|
def infer_and_show_video_button(*args): |
|
|
result_img, result_seed, result_prompt = infer_relight(*args) |
|
|
|
|
|
show_button = args[0] is not None and result_img is not None |
|
|
return result_img, result_seed, result_prompt, gr.update(visible=show_button) |
|
|
|
|
|
run_event = run_btn.click( |
|
|
fn=infer_and_show_video_button, |
|
|
inputs=inputs, |
|
|
outputs=outputs + [create_video_button] |
|
|
) |
|
|
|
|
|
|
|
|
create_video_button.click( |
|
|
fn=lambda: gr.update(visible=True), |
|
|
outputs=[video_group], |
|
|
api_name=False |
|
|
).then( |
|
|
fn=create_video_between_images, |
|
|
inputs=[image, result, prompt_preview], |
|
|
outputs=[video_output], |
|
|
api_name=False |
|
|
) |
|
|
|
|
|
|
|
|
gr.Examples( |
|
|
examples=[ |
|
|
[None, "soft_window", "side", "soft", "", 0, True, 1.0, 4, 1024, 1024], |
|
|
[None, "golden_hour", "front", "medium", "", 0, True, 1.0, 4, 1024, 1024], |
|
|
[None, "dramatic", "side", "strong", "", 0, True, 1.0, 4, 1024, 1024], |
|
|
[None, "neon", "front", "medium", "", 0, True, 1.0, 4, 1024, 1024], |
|
|
[None, "candlelight", "front", "soft", "", 0, True, 1.0, 4, 1024, 1024], |
|
|
], |
|
|
inputs=[image, light_type, light_direction, light_intensity, prompt, |
|
|
seed, randomize_seed, true_guidance_scale, num_inference_steps, height, width], |
|
|
outputs=outputs, |
|
|
fn=infer_relight, |
|
|
cache_examples="lazy", |
|
|
elem_id="examples" |
|
|
) |
|
|
|
|
|
|
|
|
image.upload( |
|
|
fn=update_dimensions_on_upload, |
|
|
inputs=[image], |
|
|
outputs=[width, height] |
|
|
).then( |
|
|
fn=reset_all, |
|
|
inputs=None, |
|
|
outputs=[light_type, light_direction, light_intensity, prompt, is_reset], |
|
|
queue=False |
|
|
).then( |
|
|
fn=end_reset, |
|
|
inputs=None, |
|
|
outputs=[is_reset], |
|
|
queue=False |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
def maybe_infer(is_reset, progress=gr.Progress(track_tqdm=True), *args): |
|
|
if is_reset: |
|
|
return gr.update(), gr.update(), gr.update(), gr.update() |
|
|
else: |
|
|
result_img, result_seed, result_prompt = infer_relight(*args) |
|
|
|
|
|
show_button = args[0] is not None and result_img is not None |
|
|
return result_img, result_seed, result_prompt, gr.update(visible=show_button) |
|
|
|
|
|
control_inputs = [ |
|
|
image, light_type, light_direction, light_intensity, prompt, |
|
|
seed, randomize_seed, true_guidance_scale, num_inference_steps, height, width, prev_output |
|
|
] |
|
|
control_inputs_with_flag = [is_reset] + control_inputs |
|
|
|
|
|
for control in [light_type, light_direction, light_intensity]: |
|
|
control.input(fn=maybe_infer, inputs=control_inputs_with_flag, outputs=outputs + [create_video_button]) |
|
|
|
|
|
|
|
|
|
|
|
run_event.then(lambda img, *_: img, inputs=[result], outputs=[prev_output]) |
|
|
|
|
|
demo.launch() |
|
|
|