Spaces:
Running
on
Zero
Running
on
Zero
Julian Bilcke
commited on
Commit
·
49465bb
1
Parent(s):
5f4445f
testing larger layouts
Browse files- app.py +186 -116
- page_layouts.yaml +119 -1
app.py
CHANGED
|
@@ -68,6 +68,7 @@ def get_layout_choices(num_images: int) -> List[Tuple[str, str]]:
|
|
| 68 |
key = f"{num_images}_image" if num_images == 1 else f"{num_images}_images"
|
| 69 |
if key in PAGE_LAYOUTS:
|
| 70 |
return [(layout["label"], layout["id"]) for layout in PAGE_LAYOUTS[key]]
|
|
|
|
| 71 |
return [("Default", "default")]
|
| 72 |
|
| 73 |
def get_random_style_preset():
|
|
@@ -252,25 +253,96 @@ pipe.fuse_lora()
|
|
| 252 |
# --- UI Constants and Helpers ---
|
| 253 |
MAX_SEED = np.iinfo(np.int32).max
|
| 254 |
|
| 255 |
-
def
|
| 256 |
-
"""
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
|
| 275 |
# --- Session Management Functions ---
|
| 276 |
|
|
@@ -398,9 +470,19 @@ def create_single_page_pdf(images: List[Image.Image], layout_id: str, num_images
|
|
| 398 |
positions = [[0.05, 0.05, 0.425, 0.9], [0.525, 0.05, 0.425, 0.9]]
|
| 399 |
elif num_images == 3:
|
| 400 |
positions = [[0.05, 0.05, 0.283, 0.9], [0.358, 0.05, 0.283, 0.9], [0.666, 0.05, 0.283, 0.9]]
|
| 401 |
-
|
| 402 |
positions = [[0.05, 0.05, 0.425, 0.425], [0.525, 0.05, 0.425, 0.425],
|
| 403 |
[0.05, 0.525, 0.425, 0.425], [0.525, 0.525, 0.425, 0.425]]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 404 |
else:
|
| 405 |
positions = layout["positions"]
|
| 406 |
|
|
@@ -424,8 +506,9 @@ def create_single_page_pdf(images: List[Image.Image], layout_id: str, num_images
|
|
| 424 |
image.save(img_buffer, format='JPEG', quality=95)
|
| 425 |
img_buffer.seek(0)
|
| 426 |
|
| 427 |
-
# Draw the image on the PDF
|
| 428 |
-
|
|
|
|
| 429 |
|
| 430 |
# Save the PDF
|
| 431 |
pdf.save()
|
|
@@ -473,12 +556,11 @@ def create_multi_page_pdf(session_manager: SessionManager) -> str:
|
|
| 473 |
return str(pdf_path)
|
| 474 |
|
| 475 |
# --- Main Inference Function (with session support) ---
|
| 476 |
-
@spaces.GPU(duration=
|
| 477 |
def infer_page(
|
| 478 |
prompt,
|
| 479 |
seed=42,
|
| 480 |
randomize_seed=False,
|
| 481 |
-
aspect_ratio="1:1",
|
| 482 |
guidance_scale=1.0,
|
| 483 |
num_inference_steps=8,
|
| 484 |
prompt_enhance=True,
|
|
@@ -496,7 +578,6 @@ def infer_page(
|
|
| 496 |
prompt (str): The text prompt to generate images from.
|
| 497 |
seed (int): The seed for the random number generator for reproducibility.
|
| 498 |
randomize_seed (bool): If True, a random seed is used for each image.
|
| 499 |
-
aspect_ratio (str): The desired aspect ratio of the output images.
|
| 500 |
guidance_scale (float): Corresponds to `true_cfg_scale`.
|
| 501 |
num_inference_steps (int): The number of denoising steps.
|
| 502 |
prompt_enhance (bool): If True, the prompt is rewritten by an external LLM.
|
|
@@ -533,12 +614,17 @@ def infer_page(
|
|
| 533 |
|
| 534 |
current_seed = seed + i if not randomize_seed else random.randint(0, MAX_SEED)
|
| 535 |
|
| 536 |
-
#
|
| 537 |
-
|
|
|
|
|
|
|
|
|
|
| 538 |
prompt=prompt,
|
| 539 |
seed=current_seed,
|
| 540 |
randomize_seed=False, # We handle randomization here
|
| 541 |
-
|
|
|
|
|
|
|
| 542 |
guidance_scale=guidance_scale,
|
| 543 |
num_inference_steps=num_inference_steps,
|
| 544 |
prompt_enhance=prompt_enhance,
|
|
@@ -572,12 +658,14 @@ def infer_page(
|
|
| 572 |
|
| 573 |
return session_state, pdf_path, generated_images[0] if generated_images else None, page_info, button_label
|
| 574 |
|
| 575 |
-
#
|
| 576 |
-
def
|
| 577 |
prompt,
|
| 578 |
seed=42,
|
| 579 |
randomize_seed=False,
|
| 580 |
-
|
|
|
|
|
|
|
| 581 |
guidance_scale=1.0,
|
| 582 |
num_inference_steps=8,
|
| 583 |
prompt_enhance=True,
|
|
@@ -585,39 +673,20 @@ def infer_single(
|
|
| 585 |
custom_style_text="",
|
| 586 |
):
|
| 587 |
"""
|
| 588 |
-
Generates an image
|
| 589 |
-
|
| 590 |
-
Args:
|
| 591 |
-
prompt (str): The text prompt to generate the image from.
|
| 592 |
-
seed (int): The seed for the random number generator for reproducibility.
|
| 593 |
-
randomize_seed (bool): If True, a random seed is used.
|
| 594 |
-
aspect_ratio (str): The desired aspect ratio of the output image.
|
| 595 |
-
guidance_scale (float): Corresponds to `true_cfg_scale`. A higher value
|
| 596 |
-
encourages the model to generate images that are more closely related
|
| 597 |
-
to the prompt.
|
| 598 |
-
num_inference_steps (int): The number of denoising steps.
|
| 599 |
-
prompt_enhance (bool): If True, the prompt is rewritten by an external
|
| 600 |
-
LLM to add more detail.
|
| 601 |
-
style_preset (str): The key of the style preset to apply.
|
| 602 |
-
custom_style_text (str): Custom style text when 'no_style' is selected.
|
| 603 |
-
progress (gr.Progress): A Gradio Progress object to track the generation
|
| 604 |
-
progress in the UI.
|
| 605 |
-
|
| 606 |
-
Returns:
|
| 607 |
-
tuple[Image.Image, int]: A tuple containing the generated PIL Image and
|
| 608 |
-
the integer seed used for the generation.
|
| 609 |
"""
|
| 610 |
if randomize_seed:
|
| 611 |
seed = random.randint(0, MAX_SEED)
|
| 612 |
|
| 613 |
-
#
|
| 614 |
-
width, height =
|
| 615 |
|
| 616 |
# Set up the generator for reproducibility
|
| 617 |
generator = torch.Generator(device="cuda").manual_seed(seed)
|
| 618 |
|
| 619 |
print(f"Original prompt: '{prompt}'")
|
| 620 |
print(f"Style preset: '{style_preset}'")
|
|
|
|
| 621 |
|
| 622 |
# Apply style preset first
|
| 623 |
styled_prompt, style_negative_prompt = apply_style_preset(prompt, style_preset, custom_style_text)
|
|
@@ -646,8 +715,8 @@ def infer_single(
|
|
| 646 |
|
| 647 |
return image, seed
|
| 648 |
|
| 649 |
-
# Keep the old infer function for backward compatibility
|
| 650 |
-
infer =
|
| 651 |
|
| 652 |
# --- Examples and UI Layout ---
|
| 653 |
examples = [
|
|
@@ -698,6 +767,46 @@ with gr.Blocks(css=css) as demo:
|
|
| 698 |
run_button = gr.Button("Generate page 1", variant="primary")
|
| 699 |
reset_button = gr.Button("Start New Document", variant="secondary")
|
| 700 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 701 |
with gr.Row():
|
| 702 |
with gr.Column(scale=1):
|
| 703 |
result_preview = gr.Image(label="Preview", show_label=True, type="pil")
|
|
@@ -719,48 +828,8 @@ with gr.Blocks(css=css) as demo:
|
|
| 719 |
randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
|
| 720 |
|
| 721 |
with gr.Row():
|
| 722 |
-
aspect_ratio = gr.Radio(
|
| 723 |
-
label="Aspect ratio (width:height)",
|
| 724 |
-
choices=["1:1", "16:9", "9:16", "4:3", "3:4", "3:2", "2:3"],
|
| 725 |
-
value="16:9",
|
| 726 |
-
)
|
| 727 |
prompt_enhance = gr.Checkbox(label="Prompt Enhance", value=True)
|
| 728 |
|
| 729 |
-
with gr.Row():
|
| 730 |
-
# Create dropdown choices from loaded presets
|
| 731 |
-
style_choices = [(preset["label"], key) for key, preset in STYLE_PRESETS.items()]
|
| 732 |
-
style_preset = gr.Dropdown(
|
| 733 |
-
label="Style Preset",
|
| 734 |
-
choices=style_choices,
|
| 735 |
-
value="no_style",
|
| 736 |
-
interactive=True
|
| 737 |
-
)
|
| 738 |
-
|
| 739 |
-
custom_style_text = gr.Textbox(
|
| 740 |
-
label="Custom Style Text",
|
| 741 |
-
placeholder="Enter custom style keywords (e.g., 'oil painting, impressionist')",
|
| 742 |
-
visible=False,
|
| 743 |
-
lines=2
|
| 744 |
-
)
|
| 745 |
-
|
| 746 |
-
with gr.Row():
|
| 747 |
-
num_images_slider = gr.Slider(
|
| 748 |
-
label="Images per page",
|
| 749 |
-
minimum=1,
|
| 750 |
-
maximum=4,
|
| 751 |
-
step=1,
|
| 752 |
-
value=1,
|
| 753 |
-
info="Number of images to generate for the PDF"
|
| 754 |
-
)
|
| 755 |
-
|
| 756 |
-
layout_dropdown = gr.Dropdown(
|
| 757 |
-
label="Page Layout",
|
| 758 |
-
choices=[("Full Page", "full_page")],
|
| 759 |
-
value="full_page",
|
| 760 |
-
interactive=True,
|
| 761 |
-
info="How images are arranged on the page"
|
| 762 |
-
)
|
| 763 |
-
|
| 764 |
with gr.Row():
|
| 765 |
guidance_scale = gr.Slider(
|
| 766 |
label="Guidance scale (True CFG Scale)",
|
|
@@ -778,33 +847,35 @@ with gr.Blocks(css=css) as demo:
|
|
| 778 |
value=8,
|
| 779 |
)
|
| 780 |
|
| 781 |
-
|
| 782 |
-
|
| 783 |
-
|
| 784 |
|
| 785 |
-
|
| 786 |
-
|
| 787 |
-
|
| 788 |
-
|
| 789 |
-
|
| 790 |
|
| 791 |
-
|
| 792 |
-
|
| 793 |
-
|
| 794 |
-
|
| 795 |
|
| 796 |
-
|
| 797 |
-
|
| 798 |
-
|
| 799 |
-
|
| 800 |
-
|
| 801 |
|
| 802 |
# Update examples to show some with different styles and image counts
|
| 803 |
styled_examples = [
|
| 804 |
["A capybara wearing a suit holding a sign that reads Hello World", "no_style", "", 1],
|
| 805 |
-
["sharks raining down on san francisco", "
|
| 806 |
-
["A beautiful landscape with mountains and a lake", "
|
| 807 |
["A knight fighting a dragon", "medieval", "", 4],
|
|
|
|
|
|
|
| 808 |
]
|
| 809 |
|
| 810 |
gr.Examples(
|
|
@@ -823,7 +894,6 @@ with gr.Blocks(css=css) as demo:
|
|
| 823 |
prompt,
|
| 824 |
seed,
|
| 825 |
randomize_seed,
|
| 826 |
-
aspect_ratio,
|
| 827 |
guidance_scale,
|
| 828 |
num_inference_steps,
|
| 829 |
prompt_enhance,
|
|
|
|
| 68 |
key = f"{num_images}_image" if num_images == 1 else f"{num_images}_images"
|
| 69 |
if key in PAGE_LAYOUTS:
|
| 70 |
return [(layout["label"], layout["id"]) for layout in PAGE_LAYOUTS[key]]
|
| 71 |
+
# Return empty list if no layouts found (shouldn't happen with our config)
|
| 72 |
return [("Default", "default")]
|
| 73 |
|
| 74 |
def get_random_style_preset():
|
|
|
|
| 253 |
# --- UI Constants and Helpers ---
|
| 254 |
MAX_SEED = np.iinfo(np.int32).max
|
| 255 |
|
| 256 |
+
def get_image_size_for_position(position_data, image_index, num_images):
|
| 257 |
+
"""Determines optimal image size based on its position in the layout.
|
| 258 |
+
|
| 259 |
+
Args:
|
| 260 |
+
position_data: Layout position data [x, y, width, height] in relative units
|
| 261 |
+
image_index: Index of the current image (0-based)
|
| 262 |
+
num_images: Total number of images in the layout
|
| 263 |
+
|
| 264 |
+
Returns:
|
| 265 |
+
tuple: (width, height) optimized for the position's aspect ratio, max 1024 in any dimension
|
| 266 |
+
"""
|
| 267 |
+
if not position_data:
|
| 268 |
+
return 1024, 1024 # Default square
|
| 269 |
+
|
| 270 |
+
x_rel, y_rel, w_rel, h_rel = position_data
|
| 271 |
+
aspect_ratio = w_rel / h_rel if h_rel > 0 else 1.0
|
| 272 |
+
|
| 273 |
+
# Max dimension is 1024
|
| 274 |
+
max_dim = 1024
|
| 275 |
+
|
| 276 |
+
# Calculate dimensions maintaining aspect ratio with max of 1024
|
| 277 |
+
if aspect_ratio >= 1: # Wider than tall
|
| 278 |
+
width = max_dim
|
| 279 |
+
height = int(max_dim / aspect_ratio)
|
| 280 |
+
# Ensure height is at least 256 for quality
|
| 281 |
+
if height < 256:
|
| 282 |
+
height = 256
|
| 283 |
+
width = int(256 * aspect_ratio)
|
| 284 |
+
else: # Taller than wide
|
| 285 |
+
height = max_dim
|
| 286 |
+
width = int(max_dim * aspect_ratio)
|
| 287 |
+
# Ensure width is at least 256 for quality
|
| 288 |
+
if width < 256:
|
| 289 |
+
width = 256
|
| 290 |
+
height = int(256 / aspect_ratio)
|
| 291 |
+
|
| 292 |
+
# Round to nearest 64 for better compatibility
|
| 293 |
+
width = (width // 64) * 64
|
| 294 |
+
height = (height // 64) * 64
|
| 295 |
+
|
| 296 |
+
# Ensure we don't exceed max_dim after rounding
|
| 297 |
+
if width > max_dim:
|
| 298 |
+
width = max_dim
|
| 299 |
+
if height > max_dim:
|
| 300 |
+
height = max_dim
|
| 301 |
+
|
| 302 |
+
# Minimum size check
|
| 303 |
+
width = max(width, 256)
|
| 304 |
+
height = max(height, 256)
|
| 305 |
+
|
| 306 |
+
return width, height
|
| 307 |
+
|
| 308 |
+
def get_layout_position_for_image(layout_id, num_images, image_index):
|
| 309 |
+
"""Get the position data for a specific image in a layout.
|
| 310 |
+
|
| 311 |
+
Args:
|
| 312 |
+
layout_id: ID of the selected layout
|
| 313 |
+
num_images: Total number of images
|
| 314 |
+
image_index: Index of the current image (0-based)
|
| 315 |
+
|
| 316 |
+
Returns:
|
| 317 |
+
Position data [x, y, width, height] or None
|
| 318 |
+
"""
|
| 319 |
+
key = f"{num_images}_image" if num_images == 1 else f"{num_images}_images"
|
| 320 |
+
layouts = PAGE_LAYOUTS.get(key, [])
|
| 321 |
+
layout = next((l for l in layouts if l["id"] == layout_id), None)
|
| 322 |
+
|
| 323 |
+
if layout and "positions" in layout:
|
| 324 |
+
positions = layout["positions"]
|
| 325 |
+
if image_index < len(positions):
|
| 326 |
+
return positions[image_index]
|
| 327 |
+
|
| 328 |
+
# Fallback positions for each number of images
|
| 329 |
+
fallback_positions = {
|
| 330 |
+
1: [[0.05, 0.05, 0.9, 0.9]],
|
| 331 |
+
2: [[0.05, 0.05, 0.425, 0.9], [0.525, 0.05, 0.425, 0.9]],
|
| 332 |
+
3: [[0.05, 0.25, 0.283, 0.5], [0.358, 0.25, 0.283, 0.5], [0.666, 0.25, 0.283, 0.5]],
|
| 333 |
+
4: [[0.05, 0.05, 0.425, 0.425], [0.525, 0.05, 0.425, 0.425],
|
| 334 |
+
[0.05, 0.525, 0.425, 0.425], [0.525, 0.525, 0.425, 0.425]],
|
| 335 |
+
5: [[0.05, 0.05, 0.9, 0.3], [0.05, 0.4, 0.283, 0.55], [0.358, 0.4, 0.283, 0.55],
|
| 336 |
+
[0.666, 0.4, 0.283, 0.275], [0.666, 0.7, 0.283, 0.275]],
|
| 337 |
+
6: [[0.05, 0.05, 0.425, 0.283], [0.525, 0.05, 0.425, 0.283],
|
| 338 |
+
[0.05, 0.358, 0.425, 0.283], [0.525, 0.358, 0.425, 0.283],
|
| 339 |
+
[0.05, 0.666, 0.425, 0.283], [0.525, 0.666, 0.425, 0.283]]
|
| 340 |
+
}
|
| 341 |
+
|
| 342 |
+
positions = fallback_positions.get(num_images, fallback_positions[1])
|
| 343 |
+
if image_index < len(positions):
|
| 344 |
+
return positions[image_index]
|
| 345 |
+
return [0.05, 0.05, 0.9, 0.9] # Ultimate default
|
| 346 |
|
| 347 |
# --- Session Management Functions ---
|
| 348 |
|
|
|
|
| 470 |
positions = [[0.05, 0.05, 0.425, 0.9], [0.525, 0.05, 0.425, 0.9]]
|
| 471 |
elif num_images == 3:
|
| 472 |
positions = [[0.05, 0.05, 0.283, 0.9], [0.358, 0.05, 0.283, 0.9], [0.666, 0.05, 0.283, 0.9]]
|
| 473 |
+
elif num_images == 4:
|
| 474 |
positions = [[0.05, 0.05, 0.425, 0.425], [0.525, 0.05, 0.425, 0.425],
|
| 475 |
[0.05, 0.525, 0.425, 0.425], [0.525, 0.525, 0.425, 0.425]]
|
| 476 |
+
elif num_images == 5:
|
| 477 |
+
positions = [[0.05, 0.05, 0.9, 0.3], [0.05, 0.4, 0.283, 0.55], [0.358, 0.4, 0.283, 0.55],
|
| 478 |
+
[0.666, 0.4, 0.283, 0.275], [0.666, 0.7, 0.283, 0.275]]
|
| 479 |
+
elif num_images == 6:
|
| 480 |
+
positions = [[0.05, 0.05, 0.425, 0.283], [0.525, 0.05, 0.425, 0.283],
|
| 481 |
+
[0.05, 0.358, 0.425, 0.283], [0.525, 0.358, 0.425, 0.283],
|
| 482 |
+
[0.05, 0.666, 0.425, 0.283], [0.525, 0.666, 0.425, 0.283]]
|
| 483 |
+
else:
|
| 484 |
+
# For more than 6, create a simple grid
|
| 485 |
+
positions = [[0.05, 0.05, 0.9, 0.9]]
|
| 486 |
else:
|
| 487 |
positions = layout["positions"]
|
| 488 |
|
|
|
|
| 506 |
image.save(img_buffer, format='JPEG', quality=95)
|
| 507 |
img_buffer.seek(0)
|
| 508 |
|
| 509 |
+
# Draw the image on the PDF - fill the entire allocated space
|
| 510 |
+
# preserveAspectRatio=False to fill the entire area
|
| 511 |
+
pdf.drawImage(ImageReader(img_buffer), x, y, width=width, height=height, preserveAspectRatio=False, mask='auto')
|
| 512 |
|
| 513 |
# Save the PDF
|
| 514 |
pdf.save()
|
|
|
|
| 556 |
return str(pdf_path)
|
| 557 |
|
| 558 |
# --- Main Inference Function (with session support) ---
|
| 559 |
+
@spaces.GPU(duration=180) # Increased duration for up to 6 images
|
| 560 |
def infer_page(
|
| 561 |
prompt,
|
| 562 |
seed=42,
|
| 563 |
randomize_seed=False,
|
|
|
|
| 564 |
guidance_scale=1.0,
|
| 565 |
num_inference_steps=8,
|
| 566 |
prompt_enhance=True,
|
|
|
|
| 578 |
prompt (str): The text prompt to generate images from.
|
| 579 |
seed (int): The seed for the random number generator for reproducibility.
|
| 580 |
randomize_seed (bool): If True, a random seed is used for each image.
|
|
|
|
| 581 |
guidance_scale (float): Corresponds to `true_cfg_scale`.
|
| 582 |
num_inference_steps (int): The number of denoising steps.
|
| 583 |
prompt_enhance (bool): If True, the prompt is rewritten by an external LLM.
|
|
|
|
| 614 |
|
| 615 |
current_seed = seed + i if not randomize_seed else random.randint(0, MAX_SEED)
|
| 616 |
|
| 617 |
+
# Get optimal aspect ratio based on position in layout
|
| 618 |
+
position_data = get_layout_position_for_image(layout, int(num_images), i)
|
| 619 |
+
|
| 620 |
+
# Generate single image with automatic aspect ratio
|
| 621 |
+
image, used_seed = infer_single_auto(
|
| 622 |
prompt=prompt,
|
| 623 |
seed=current_seed,
|
| 624 |
randomize_seed=False, # We handle randomization here
|
| 625 |
+
position_data=position_data,
|
| 626 |
+
image_index=i,
|
| 627 |
+
num_images=int(num_images),
|
| 628 |
guidance_scale=guidance_scale,
|
| 629 |
num_inference_steps=num_inference_steps,
|
| 630 |
prompt_enhance=prompt_enhance,
|
|
|
|
| 658 |
|
| 659 |
return session_state, pdf_path, generated_images[0] if generated_images else None, page_info, button_label
|
| 660 |
|
| 661 |
+
# New inference function with automatic aspect ratio
|
| 662 |
+
def infer_single_auto(
|
| 663 |
prompt,
|
| 664 |
seed=42,
|
| 665 |
randomize_seed=False,
|
| 666 |
+
position_data=None,
|
| 667 |
+
image_index=0,
|
| 668 |
+
num_images=1,
|
| 669 |
guidance_scale=1.0,
|
| 670 |
num_inference_steps=8,
|
| 671 |
prompt_enhance=True,
|
|
|
|
| 673 |
custom_style_text="",
|
| 674 |
):
|
| 675 |
"""
|
| 676 |
+
Generates an image with automatically determined aspect ratio based on layout position.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 677 |
"""
|
| 678 |
if randomize_seed:
|
| 679 |
seed = random.randint(0, MAX_SEED)
|
| 680 |
|
| 681 |
+
# Automatically determine image size based on position
|
| 682 |
+
width, height = get_image_size_for_position(position_data, image_index, num_images)
|
| 683 |
|
| 684 |
# Set up the generator for reproducibility
|
| 685 |
generator = torch.Generator(device="cuda").manual_seed(seed)
|
| 686 |
|
| 687 |
print(f"Original prompt: '{prompt}'")
|
| 688 |
print(f"Style preset: '{style_preset}'")
|
| 689 |
+
print(f"Auto-selected size based on layout: {width}x{height}")
|
| 690 |
|
| 691 |
# Apply style preset first
|
| 692 |
styled_prompt, style_negative_prompt = apply_style_preset(prompt, style_preset, custom_style_text)
|
|
|
|
| 715 |
|
| 716 |
return image, seed
|
| 717 |
|
| 718 |
+
# Keep the old infer function for backward compatibility (simplified)
|
| 719 |
+
infer = infer_single_auto
|
| 720 |
|
| 721 |
# --- Examples and UI Layout ---
|
| 722 |
examples = [
|
|
|
|
| 767 |
run_button = gr.Button("Generate page 1", variant="primary")
|
| 768 |
reset_button = gr.Button("Start New Document", variant="secondary")
|
| 769 |
|
| 770 |
+
# New row for Style Preset and Page Layout
|
| 771 |
+
with gr.Row():
|
| 772 |
+
with gr.Column(scale=1):
|
| 773 |
+
# Number of images slider (affects layout choices)
|
| 774 |
+
num_images_slider = gr.Slider(
|
| 775 |
+
label="Images per page",
|
| 776 |
+
minimum=1,
|
| 777 |
+
maximum=6,
|
| 778 |
+
step=1,
|
| 779 |
+
value=1,
|
| 780 |
+
info="Number of images to generate for the PDF (1-6)"
|
| 781 |
+
)
|
| 782 |
+
|
| 783 |
+
with gr.Column(scale=2):
|
| 784 |
+
layout_dropdown = gr.Dropdown(
|
| 785 |
+
label="Page Layout",
|
| 786 |
+
choices=[("Full Page", "full_page")],
|
| 787 |
+
value="full_page",
|
| 788 |
+
interactive=True,
|
| 789 |
+
info="How images are arranged on the page"
|
| 790 |
+
)
|
| 791 |
+
|
| 792 |
+
with gr.Column(scale=2):
|
| 793 |
+
# Create dropdown choices from loaded presets
|
| 794 |
+
style_choices = [(preset["label"], key) for key, preset in STYLE_PRESETS.items()]
|
| 795 |
+
style_preset = gr.Dropdown(
|
| 796 |
+
label="Style Preset",
|
| 797 |
+
choices=style_choices,
|
| 798 |
+
value="no_style",
|
| 799 |
+
interactive=True
|
| 800 |
+
)
|
| 801 |
+
|
| 802 |
+
with gr.Column(scale=2):
|
| 803 |
+
custom_style_text = gr.Textbox(
|
| 804 |
+
label="Custom Style Text",
|
| 805 |
+
placeholder="Enter custom style (e.g., 'oil painting')",
|
| 806 |
+
visible=False,
|
| 807 |
+
lines=1
|
| 808 |
+
)
|
| 809 |
+
|
| 810 |
with gr.Row():
|
| 811 |
with gr.Column(scale=1):
|
| 812 |
result_preview = gr.Image(label="Preview", show_label=True, type="pil")
|
|
|
|
| 828 |
randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
|
| 829 |
|
| 830 |
with gr.Row():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 831 |
prompt_enhance = gr.Checkbox(label="Prompt Enhance", value=True)
|
| 832 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 833 |
with gr.Row():
|
| 834 |
guidance_scale = gr.Slider(
|
| 835 |
label="Guidance scale (True CFG Scale)",
|
|
|
|
| 847 |
value=8,
|
| 848 |
)
|
| 849 |
|
| 850 |
+
# Add interaction to show/hide custom style text field
|
| 851 |
+
def toggle_custom_style(style_value):
|
| 852 |
+
return gr.update(visible=(style_value == "no_style"))
|
| 853 |
|
| 854 |
+
style_preset.change(
|
| 855 |
+
fn=toggle_custom_style,
|
| 856 |
+
inputs=[style_preset],
|
| 857 |
+
outputs=[custom_style_text]
|
| 858 |
+
)
|
| 859 |
|
| 860 |
+
# Update layout dropdown when number of images changes
|
| 861 |
+
def update_layout_choices(num_images):
|
| 862 |
+
choices = get_layout_choices(int(num_images))
|
| 863 |
+
return gr.update(choices=choices, value=choices[0][1] if choices else "default")
|
| 864 |
|
| 865 |
+
num_images_slider.change(
|
| 866 |
+
fn=update_layout_choices,
|
| 867 |
+
inputs=[num_images_slider],
|
| 868 |
+
outputs=[layout_dropdown]
|
| 869 |
+
)
|
| 870 |
|
| 871 |
# Update examples to show some with different styles and image counts
|
| 872 |
styled_examples = [
|
| 873 |
["A capybara wearing a suit holding a sign that reads Hello World", "no_style", "", 1],
|
| 874 |
+
["sharks raining down on san francisco", "anime", "", 2],
|
| 875 |
+
["A beautiful landscape with mountains and a lake", "watercolor", "", 3],
|
| 876 |
["A knight fighting a dragon", "medieval", "", 4],
|
| 877 |
+
["Space battle with laser beams", "sci-fi", "", 5],
|
| 878 |
+
["Detective investigating a mystery", "noir", "", 6],
|
| 879 |
]
|
| 880 |
|
| 881 |
gr.Examples(
|
|
|
|
| 894 |
prompt,
|
| 895 |
seed,
|
| 896 |
randomize_seed,
|
|
|
|
| 897 |
guidance_scale,
|
| 898 |
num_inference_steps,
|
| 899 |
prompt_enhance,
|
page_layouts.yaml
CHANGED
|
@@ -124,4 +124,122 @@ layouts:
|
|
| 124 |
- [0.05, 0.05, 0.425, 0.425] # Top left (large)
|
| 125 |
- [0.525, 0.05, 0.425, 0.425] # Top right (large)
|
| 126 |
- [0.05, 0.525, 0.425, 0.425] # Bottom left
|
| 127 |
-
- [0.525, 0.7, 0.425, 0.25] # Bottom right (small)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
- [0.05, 0.05, 0.425, 0.425] # Top left (large)
|
| 125 |
- [0.525, 0.05, 0.425, 0.425] # Top right (large)
|
| 126 |
- [0.05, 0.525, 0.425, 0.425] # Bottom left
|
| 127 |
+
- [0.525, 0.7, 0.425, 0.25] # Bottom right (small)
|
| 128 |
+
|
| 129 |
+
5_images:
|
| 130 |
+
- id: "us_comic_action"
|
| 131 |
+
label: "US Comic - Action Scene"
|
| 132 |
+
description: "Classic American superhero comic layout with large establishing shot"
|
| 133 |
+
positions:
|
| 134 |
+
- [0.05, 0.05, 0.9, 0.4] # Wide establishing shot (panoramic)
|
| 135 |
+
- [0.05, 0.5, 0.283, 0.45] # Action panel 1
|
| 136 |
+
- [0.358, 0.5, 0.283, 0.45] # Action panel 2
|
| 137 |
+
- [0.666, 0.5, 0.283, 0.225] # Close-up 1
|
| 138 |
+
- [0.666, 0.75, 0.283, 0.225] # Close-up 2
|
| 139 |
+
|
| 140 |
+
- id: "manga_vertical_flow"
|
| 141 |
+
label: "Manga - Vertical Flow"
|
| 142 |
+
description: "Japanese manga style with vertical reading flow"
|
| 143 |
+
positions:
|
| 144 |
+
- [0.525, 0.05, 0.425, 0.35] # Top right (read first in manga)
|
| 145 |
+
- [0.05, 0.05, 0.425, 0.35] # Top left
|
| 146 |
+
- [0.525, 0.45, 0.425, 0.25] # Middle right
|
| 147 |
+
- [0.05, 0.45, 0.425, 0.25] # Middle left
|
| 148 |
+
- [0.05, 0.75, 0.9, 0.2] # Bottom wide panel
|
| 149 |
+
|
| 150 |
+
- id: "euro_bd_grid"
|
| 151 |
+
label: "European BD - Clear Grid"
|
| 152 |
+
description: "Franco-Belgian clear line style with regular panels"
|
| 153 |
+
positions:
|
| 154 |
+
- [0.05, 0.05, 0.425, 0.283] # Row 1 left
|
| 155 |
+
- [0.525, 0.05, 0.425, 0.283] # Row 1 right
|
| 156 |
+
- [0.05, 0.358, 0.9, 0.283] # Row 2 wide
|
| 157 |
+
- [0.05, 0.666, 0.425, 0.283] # Row 3 left
|
| 158 |
+
- [0.525, 0.666, 0.425, 0.283] # Row 3 right
|
| 159 |
+
|
| 160 |
+
- id: "diagonal_dynamic"
|
| 161 |
+
label: "Dynamic Diagonal"
|
| 162 |
+
description: "Action-oriented diagonal composition"
|
| 163 |
+
positions:
|
| 164 |
+
- [0.05, 0.05, 0.5, 0.4] # Large top left
|
| 165 |
+
- [0.6, 0.05, 0.35, 0.25] # Small top right
|
| 166 |
+
- [0.3, 0.35, 0.4, 0.3] # Center focus
|
| 167 |
+
- [0.05, 0.7, 0.35, 0.25] # Bottom left
|
| 168 |
+
- [0.6, 0.7, 0.35, 0.25] # Bottom right
|
| 169 |
+
|
| 170 |
+
- id: "spiral_focus"
|
| 171 |
+
label: "Spiral Focus"
|
| 172 |
+
description: "Panels arranged in a spiral leading to center"
|
| 173 |
+
positions:
|
| 174 |
+
- [0.05, 0.05, 0.35, 0.35] # Top left
|
| 175 |
+
- [0.425, 0.05, 0.525, 0.25] # Top wide
|
| 176 |
+
- [0.7, 0.35, 0.25, 0.6] # Right tall
|
| 177 |
+
- [0.425, 0.7, 0.525, 0.25] # Bottom wide
|
| 178 |
+
- [0.25, 0.35, 0.4, 0.3] # Center focus
|
| 179 |
+
|
| 180 |
+
6_images:
|
| 181 |
+
- id: "classic_comic_grid"
|
| 182 |
+
label: "Classic Comic Grid"
|
| 183 |
+
description: "Traditional 2x3 American comic book grid"
|
| 184 |
+
positions:
|
| 185 |
+
- [0.05, 0.05, 0.425, 0.283] # Row 1 left
|
| 186 |
+
- [0.525, 0.05, 0.425, 0.283] # Row 1 right
|
| 187 |
+
- [0.05, 0.358, 0.425, 0.283] # Row 2 left
|
| 188 |
+
- [0.525, 0.358, 0.425, 0.283] # Row 2 right
|
| 189 |
+
- [0.05, 0.666, 0.425, 0.283] # Row 3 left
|
| 190 |
+
- [0.525, 0.666, 0.425, 0.283] # Row 3 right
|
| 191 |
+
|
| 192 |
+
- id: "manga_4koma"
|
| 193 |
+
label: "Manga - 4-Koma Plus"
|
| 194 |
+
description: "Japanese 4-panel strip with header and footer"
|
| 195 |
+
positions:
|
| 196 |
+
- [0.05, 0.05, 0.9, 0.15] # Header panel
|
| 197 |
+
- [0.05, 0.25, 0.425, 0.2] # Strip 1
|
| 198 |
+
- [0.525, 0.25, 0.425, 0.2] # Strip 2
|
| 199 |
+
- [0.05, 0.5, 0.425, 0.2] # Strip 3
|
| 200 |
+
- [0.525, 0.5, 0.425, 0.2] # Strip 4
|
| 201 |
+
- [0.05, 0.75, 0.9, 0.2] # Footer/punchline
|
| 202 |
+
|
| 203 |
+
- id: "euro_bd_cinematic"
|
| 204 |
+
label: "European BD - Cinematic"
|
| 205 |
+
description: "Cinematic European style with varied panel sizes"
|
| 206 |
+
positions:
|
| 207 |
+
- [0.05, 0.05, 0.9, 0.25] # Wide establishing
|
| 208 |
+
- [0.05, 0.35, 0.283, 0.25] # Small 1
|
| 209 |
+
- [0.358, 0.35, 0.283, 0.25] # Small 2
|
| 210 |
+
- [0.666, 0.35, 0.283, 0.25] # Small 3
|
| 211 |
+
- [0.05, 0.65, 0.425, 0.3] # Medium left
|
| 212 |
+
- [0.525, 0.65, 0.425, 0.3] # Medium right
|
| 213 |
+
|
| 214 |
+
- id: "action_sequence"
|
| 215 |
+
label: "Action Sequence"
|
| 216 |
+
description: "Fast-paced action scene layout"
|
| 217 |
+
positions:
|
| 218 |
+
- [0.05, 0.05, 0.6, 0.35] # Large action shot
|
| 219 |
+
- [0.7, 0.05, 0.25, 0.175] # Speed line 1
|
| 220 |
+
- [0.7, 0.25, 0.25, 0.175] # Speed line 2
|
| 221 |
+
- [0.05, 0.45, 0.283, 0.5] # Vertical impact 1
|
| 222 |
+
- [0.358, 0.45, 0.283, 0.5] # Vertical impact 2
|
| 223 |
+
- [0.666, 0.45, 0.283, 0.5] # Vertical impact 3
|
| 224 |
+
|
| 225 |
+
- id: "storytelling_flow"
|
| 226 |
+
label: "Storytelling Flow"
|
| 227 |
+
description: "Natural reading flow for narrative scenes"
|
| 228 |
+
positions:
|
| 229 |
+
- [0.05, 0.05, 0.425, 0.25] # Scene 1
|
| 230 |
+
- [0.525, 0.05, 0.425, 0.25] # Scene 2
|
| 231 |
+
- [0.05, 0.35, 0.9, 0.2] # Wide transition
|
| 232 |
+
- [0.05, 0.6, 0.425, 0.35] # Scene 3
|
| 233 |
+
- [0.525, 0.6, 0.425, 0.175] # Scene 4a
|
| 234 |
+
- [0.525, 0.8, 0.425, 0.175] # Scene 4b
|
| 235 |
+
|
| 236 |
+
- id: "focus_surround"
|
| 237 |
+
label: "Focus with Details"
|
| 238 |
+
description: "Central focus with surrounding detail panels"
|
| 239 |
+
positions:
|
| 240 |
+
- [0.25, 0.25, 0.5, 0.5] # Large center focus
|
| 241 |
+
- [0.05, 0.05, 0.35, 0.15] # Top left detail
|
| 242 |
+
- [0.6, 0.05, 0.35, 0.15] # Top right detail
|
| 243 |
+
- [0.05, 0.8, 0.35, 0.15] # Bottom left detail
|
| 244 |
+
- [0.6, 0.8, 0.35, 0.15] # Bottom right detail
|
| 245 |
+
- [0.05, 0.4, 0.15, 0.3] # Left side detail
|