Julian Bilcke commited on
Commit
49465bb
·
1 Parent(s): 5f4445f

testing larger layouts

Browse files
Files changed (2) hide show
  1. app.py +186 -116
  2. page_layouts.yaml +119 -1
app.py CHANGED
@@ -68,6 +68,7 @@ def get_layout_choices(num_images: int) -> List[Tuple[str, str]]:
68
  key = f"{num_images}_image" if num_images == 1 else f"{num_images}_images"
69
  if key in PAGE_LAYOUTS:
70
  return [(layout["label"], layout["id"]) for layout in PAGE_LAYOUTS[key]]
 
71
  return [("Default", "default")]
72
 
73
  def get_random_style_preset():
@@ -252,25 +253,96 @@ pipe.fuse_lora()
252
  # --- UI Constants and Helpers ---
253
  MAX_SEED = np.iinfo(np.int32).max
254
 
255
- def get_image_size(aspect_ratio):
256
- """Converts aspect ratio string to width, height tuple, optimized for 1024 base."""
257
- if aspect_ratio == "1:1":
258
- return 1024, 1024
259
- elif aspect_ratio == "16:9":
260
- return 1152, 640
261
- elif aspect_ratio == "9:16":
262
- return 640, 1152
263
- elif aspect_ratio == "4:3":
264
- return 1024, 768
265
- elif aspect_ratio == "3:4":
266
- return 768, 1024
267
- elif aspect_ratio == "3:2":
268
- return 1024, 688
269
- elif aspect_ratio == "2:3":
270
- return 688, 1024
271
- else:
272
- # Default to 1:1 if something goes wrong
273
- return 1024, 1024
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
 
275
  # --- Session Management Functions ---
276
 
@@ -398,9 +470,19 @@ def create_single_page_pdf(images: List[Image.Image], layout_id: str, num_images
398
  positions = [[0.05, 0.05, 0.425, 0.9], [0.525, 0.05, 0.425, 0.9]]
399
  elif num_images == 3:
400
  positions = [[0.05, 0.05, 0.283, 0.9], [0.358, 0.05, 0.283, 0.9], [0.666, 0.05, 0.283, 0.9]]
401
- else:
402
  positions = [[0.05, 0.05, 0.425, 0.425], [0.525, 0.05, 0.425, 0.425],
403
  [0.05, 0.525, 0.425, 0.425], [0.525, 0.525, 0.425, 0.425]]
 
 
 
 
 
 
 
 
 
 
404
  else:
405
  positions = layout["positions"]
406
 
@@ -424,8 +506,9 @@ def create_single_page_pdf(images: List[Image.Image], layout_id: str, num_images
424
  image.save(img_buffer, format='JPEG', quality=95)
425
  img_buffer.seek(0)
426
 
427
- # Draw the image on the PDF
428
- pdf.drawImage(ImageReader(img_buffer), x, y, width=width, height=height, preserveAspectRatio=True)
 
429
 
430
  # Save the PDF
431
  pdf.save()
@@ -473,12 +556,11 @@ def create_multi_page_pdf(session_manager: SessionManager) -> str:
473
  return str(pdf_path)
474
 
475
  # --- Main Inference Function (with session support) ---
476
- @spaces.GPU(duration=120) # Increased duration for multiple images
477
  def infer_page(
478
  prompt,
479
  seed=42,
480
  randomize_seed=False,
481
- aspect_ratio="1:1",
482
  guidance_scale=1.0,
483
  num_inference_steps=8,
484
  prompt_enhance=True,
@@ -496,7 +578,6 @@ def infer_page(
496
  prompt (str): The text prompt to generate images from.
497
  seed (int): The seed for the random number generator for reproducibility.
498
  randomize_seed (bool): If True, a random seed is used for each image.
499
- aspect_ratio (str): The desired aspect ratio of the output images.
500
  guidance_scale (float): Corresponds to `true_cfg_scale`.
501
  num_inference_steps (int): The number of denoising steps.
502
  prompt_enhance (bool): If True, the prompt is rewritten by an external LLM.
@@ -533,12 +614,17 @@ def infer_page(
533
 
534
  current_seed = seed + i if not randomize_seed else random.randint(0, MAX_SEED)
535
 
536
- # Generate single image
537
- image, used_seed = infer_single(
 
 
 
538
  prompt=prompt,
539
  seed=current_seed,
540
  randomize_seed=False, # We handle randomization here
541
- aspect_ratio=aspect_ratio,
 
 
542
  guidance_scale=guidance_scale,
543
  num_inference_steps=num_inference_steps,
544
  prompt_enhance=prompt_enhance,
@@ -572,12 +658,14 @@ def infer_page(
572
 
573
  return session_state, pdf_path, generated_images[0] if generated_images else None, page_info, button_label
574
 
575
- # Rename the original infer function
576
- def infer_single(
577
  prompt,
578
  seed=42,
579
  randomize_seed=False,
580
- aspect_ratio="1:1",
 
 
581
  guidance_scale=1.0,
582
  num_inference_steps=8,
583
  prompt_enhance=True,
@@ -585,39 +673,20 @@ def infer_single(
585
  custom_style_text="",
586
  ):
587
  """
588
- Generates an image based on a text prompt using the Qwen-Image-Lightning model.
589
-
590
- Args:
591
- prompt (str): The text prompt to generate the image from.
592
- seed (int): The seed for the random number generator for reproducibility.
593
- randomize_seed (bool): If True, a random seed is used.
594
- aspect_ratio (str): The desired aspect ratio of the output image.
595
- guidance_scale (float): Corresponds to `true_cfg_scale`. A higher value
596
- encourages the model to generate images that are more closely related
597
- to the prompt.
598
- num_inference_steps (int): The number of denoising steps.
599
- prompt_enhance (bool): If True, the prompt is rewritten by an external
600
- LLM to add more detail.
601
- style_preset (str): The key of the style preset to apply.
602
- custom_style_text (str): Custom style text when 'no_style' is selected.
603
- progress (gr.Progress): A Gradio Progress object to track the generation
604
- progress in the UI.
605
-
606
- Returns:
607
- tuple[Image.Image, int]: A tuple containing the generated PIL Image and
608
- the integer seed used for the generation.
609
  """
610
  if randomize_seed:
611
  seed = random.randint(0, MAX_SEED)
612
 
613
- # Convert aspect ratio to width and height
614
- width, height = get_image_size(aspect_ratio)
615
 
616
  # Set up the generator for reproducibility
617
  generator = torch.Generator(device="cuda").manual_seed(seed)
618
 
619
  print(f"Original prompt: '{prompt}'")
620
  print(f"Style preset: '{style_preset}'")
 
621
 
622
  # Apply style preset first
623
  styled_prompt, style_negative_prompt = apply_style_preset(prompt, style_preset, custom_style_text)
@@ -646,8 +715,8 @@ def infer_single(
646
 
647
  return image, seed
648
 
649
- # Keep the old infer function for backward compatibility
650
- infer = infer_single
651
 
652
  # --- Examples and UI Layout ---
653
  examples = [
@@ -698,6 +767,46 @@ with gr.Blocks(css=css) as demo:
698
  run_button = gr.Button("Generate page 1", variant="primary")
699
  reset_button = gr.Button("Start New Document", variant="secondary")
700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
701
  with gr.Row():
702
  with gr.Column(scale=1):
703
  result_preview = gr.Image(label="Preview", show_label=True, type="pil")
@@ -719,48 +828,8 @@ with gr.Blocks(css=css) as demo:
719
  randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
720
 
721
  with gr.Row():
722
- aspect_ratio = gr.Radio(
723
- label="Aspect ratio (width:height)",
724
- choices=["1:1", "16:9", "9:16", "4:3", "3:4", "3:2", "2:3"],
725
- value="16:9",
726
- )
727
  prompt_enhance = gr.Checkbox(label="Prompt Enhance", value=True)
728
 
729
- with gr.Row():
730
- # Create dropdown choices from loaded presets
731
- style_choices = [(preset["label"], key) for key, preset in STYLE_PRESETS.items()]
732
- style_preset = gr.Dropdown(
733
- label="Style Preset",
734
- choices=style_choices,
735
- value="no_style",
736
- interactive=True
737
- )
738
-
739
- custom_style_text = gr.Textbox(
740
- label="Custom Style Text",
741
- placeholder="Enter custom style keywords (e.g., 'oil painting, impressionist')",
742
- visible=False,
743
- lines=2
744
- )
745
-
746
- with gr.Row():
747
- num_images_slider = gr.Slider(
748
- label="Images per page",
749
- minimum=1,
750
- maximum=4,
751
- step=1,
752
- value=1,
753
- info="Number of images to generate for the PDF"
754
- )
755
-
756
- layout_dropdown = gr.Dropdown(
757
- label="Page Layout",
758
- choices=[("Full Page", "full_page")],
759
- value="full_page",
760
- interactive=True,
761
- info="How images are arranged on the page"
762
- )
763
-
764
  with gr.Row():
765
  guidance_scale = gr.Slider(
766
  label="Guidance scale (True CFG Scale)",
@@ -778,33 +847,35 @@ with gr.Blocks(css=css) as demo:
778
  value=8,
779
  )
780
 
781
- # Add interaction to show/hide custom style text field
782
- def toggle_custom_style(style_value):
783
- return gr.update(visible=(style_value == "no_style"))
784
 
785
- style_preset.change(
786
- fn=toggle_custom_style,
787
- inputs=[style_preset],
788
- outputs=[custom_style_text]
789
- )
790
 
791
- # Update layout dropdown when number of images changes
792
- def update_layout_choices(num_images):
793
- choices = get_layout_choices(int(num_images))
794
- return gr.update(choices=choices, value=choices[0][1] if choices else "default")
795
 
796
- num_images_slider.change(
797
- fn=update_layout_choices,
798
- inputs=[num_images_slider],
799
- outputs=[layout_dropdown]
800
- )
801
 
802
  # Update examples to show some with different styles and image counts
803
  styled_examples = [
804
  ["A capybara wearing a suit holding a sign that reads Hello World", "no_style", "", 1],
805
- ["sharks raining down on san francisco", "flying_saucer", "", 2],
806
- ["A beautiful landscape with mountains and a lake", "klimt", "", 3],
807
  ["A knight fighting a dragon", "medieval", "", 4],
 
 
808
  ]
809
 
810
  gr.Examples(
@@ -823,7 +894,6 @@ with gr.Blocks(css=css) as demo:
823
  prompt,
824
  seed,
825
  randomize_seed,
826
- aspect_ratio,
827
  guidance_scale,
828
  num_inference_steps,
829
  prompt_enhance,
 
68
  key = f"{num_images}_image" if num_images == 1 else f"{num_images}_images"
69
  if key in PAGE_LAYOUTS:
70
  return [(layout["label"], layout["id"]) for layout in PAGE_LAYOUTS[key]]
71
+ # Return empty list if no layouts found (shouldn't happen with our config)
72
  return [("Default", "default")]
73
 
74
  def get_random_style_preset():
 
253
  # --- UI Constants and Helpers ---
254
  MAX_SEED = np.iinfo(np.int32).max
255
 
256
+ def get_image_size_for_position(position_data, image_index, num_images):
257
+ """Determines optimal image size based on its position in the layout.
258
+
259
+ Args:
260
+ position_data: Layout position data [x, y, width, height] in relative units
261
+ image_index: Index of the current image (0-based)
262
+ num_images: Total number of images in the layout
263
+
264
+ Returns:
265
+ tuple: (width, height) optimized for the position's aspect ratio, max 1024 in any dimension
266
+ """
267
+ if not position_data:
268
+ return 1024, 1024 # Default square
269
+
270
+ x_rel, y_rel, w_rel, h_rel = position_data
271
+ aspect_ratio = w_rel / h_rel if h_rel > 0 else 1.0
272
+
273
+ # Max dimension is 1024
274
+ max_dim = 1024
275
+
276
+ # Calculate dimensions maintaining aspect ratio with max of 1024
277
+ if aspect_ratio >= 1: # Wider than tall
278
+ width = max_dim
279
+ height = int(max_dim / aspect_ratio)
280
+ # Ensure height is at least 256 for quality
281
+ if height < 256:
282
+ height = 256
283
+ width = int(256 * aspect_ratio)
284
+ else: # Taller than wide
285
+ height = max_dim
286
+ width = int(max_dim * aspect_ratio)
287
+ # Ensure width is at least 256 for quality
288
+ if width < 256:
289
+ width = 256
290
+ height = int(256 / aspect_ratio)
291
+
292
+ # Round to nearest 64 for better compatibility
293
+ width = (width // 64) * 64
294
+ height = (height // 64) * 64
295
+
296
+ # Ensure we don't exceed max_dim after rounding
297
+ if width > max_dim:
298
+ width = max_dim
299
+ if height > max_dim:
300
+ height = max_dim
301
+
302
+ # Minimum size check
303
+ width = max(width, 256)
304
+ height = max(height, 256)
305
+
306
+ return width, height
307
+
308
+ def get_layout_position_for_image(layout_id, num_images, image_index):
309
+ """Get the position data for a specific image in a layout.
310
+
311
+ Args:
312
+ layout_id: ID of the selected layout
313
+ num_images: Total number of images
314
+ image_index: Index of the current image (0-based)
315
+
316
+ Returns:
317
+ Position data [x, y, width, height] or None
318
+ """
319
+ key = f"{num_images}_image" if num_images == 1 else f"{num_images}_images"
320
+ layouts = PAGE_LAYOUTS.get(key, [])
321
+ layout = next((l for l in layouts if l["id"] == layout_id), None)
322
+
323
+ if layout and "positions" in layout:
324
+ positions = layout["positions"]
325
+ if image_index < len(positions):
326
+ return positions[image_index]
327
+
328
+ # Fallback positions for each number of images
329
+ fallback_positions = {
330
+ 1: [[0.05, 0.05, 0.9, 0.9]],
331
+ 2: [[0.05, 0.05, 0.425, 0.9], [0.525, 0.05, 0.425, 0.9]],
332
+ 3: [[0.05, 0.25, 0.283, 0.5], [0.358, 0.25, 0.283, 0.5], [0.666, 0.25, 0.283, 0.5]],
333
+ 4: [[0.05, 0.05, 0.425, 0.425], [0.525, 0.05, 0.425, 0.425],
334
+ [0.05, 0.525, 0.425, 0.425], [0.525, 0.525, 0.425, 0.425]],
335
+ 5: [[0.05, 0.05, 0.9, 0.3], [0.05, 0.4, 0.283, 0.55], [0.358, 0.4, 0.283, 0.55],
336
+ [0.666, 0.4, 0.283, 0.275], [0.666, 0.7, 0.283, 0.275]],
337
+ 6: [[0.05, 0.05, 0.425, 0.283], [0.525, 0.05, 0.425, 0.283],
338
+ [0.05, 0.358, 0.425, 0.283], [0.525, 0.358, 0.425, 0.283],
339
+ [0.05, 0.666, 0.425, 0.283], [0.525, 0.666, 0.425, 0.283]]
340
+ }
341
+
342
+ positions = fallback_positions.get(num_images, fallback_positions[1])
343
+ if image_index < len(positions):
344
+ return positions[image_index]
345
+ return [0.05, 0.05, 0.9, 0.9] # Ultimate default
346
 
347
  # --- Session Management Functions ---
348
 
 
470
  positions = [[0.05, 0.05, 0.425, 0.9], [0.525, 0.05, 0.425, 0.9]]
471
  elif num_images == 3:
472
  positions = [[0.05, 0.05, 0.283, 0.9], [0.358, 0.05, 0.283, 0.9], [0.666, 0.05, 0.283, 0.9]]
473
+ elif num_images == 4:
474
  positions = [[0.05, 0.05, 0.425, 0.425], [0.525, 0.05, 0.425, 0.425],
475
  [0.05, 0.525, 0.425, 0.425], [0.525, 0.525, 0.425, 0.425]]
476
+ elif num_images == 5:
477
+ positions = [[0.05, 0.05, 0.9, 0.3], [0.05, 0.4, 0.283, 0.55], [0.358, 0.4, 0.283, 0.55],
478
+ [0.666, 0.4, 0.283, 0.275], [0.666, 0.7, 0.283, 0.275]]
479
+ elif num_images == 6:
480
+ positions = [[0.05, 0.05, 0.425, 0.283], [0.525, 0.05, 0.425, 0.283],
481
+ [0.05, 0.358, 0.425, 0.283], [0.525, 0.358, 0.425, 0.283],
482
+ [0.05, 0.666, 0.425, 0.283], [0.525, 0.666, 0.425, 0.283]]
483
+ else:
484
+ # For more than 6, create a simple grid
485
+ positions = [[0.05, 0.05, 0.9, 0.9]]
486
  else:
487
  positions = layout["positions"]
488
 
 
506
  image.save(img_buffer, format='JPEG', quality=95)
507
  img_buffer.seek(0)
508
 
509
+ # Draw the image on the PDF - fill the entire allocated space
510
+ # preserveAspectRatio=False to fill the entire area
511
+ pdf.drawImage(ImageReader(img_buffer), x, y, width=width, height=height, preserveAspectRatio=False, mask='auto')
512
 
513
  # Save the PDF
514
  pdf.save()
 
556
  return str(pdf_path)
557
 
558
  # --- Main Inference Function (with session support) ---
559
+ @spaces.GPU(duration=180) # Increased duration for up to 6 images
560
  def infer_page(
561
  prompt,
562
  seed=42,
563
  randomize_seed=False,
 
564
  guidance_scale=1.0,
565
  num_inference_steps=8,
566
  prompt_enhance=True,
 
578
  prompt (str): The text prompt to generate images from.
579
  seed (int): The seed for the random number generator for reproducibility.
580
  randomize_seed (bool): If True, a random seed is used for each image.
 
581
  guidance_scale (float): Corresponds to `true_cfg_scale`.
582
  num_inference_steps (int): The number of denoising steps.
583
  prompt_enhance (bool): If True, the prompt is rewritten by an external LLM.
 
614
 
615
  current_seed = seed + i if not randomize_seed else random.randint(0, MAX_SEED)
616
 
617
+ # Get optimal aspect ratio based on position in layout
618
+ position_data = get_layout_position_for_image(layout, int(num_images), i)
619
+
620
+ # Generate single image with automatic aspect ratio
621
+ image, used_seed = infer_single_auto(
622
  prompt=prompt,
623
  seed=current_seed,
624
  randomize_seed=False, # We handle randomization here
625
+ position_data=position_data,
626
+ image_index=i,
627
+ num_images=int(num_images),
628
  guidance_scale=guidance_scale,
629
  num_inference_steps=num_inference_steps,
630
  prompt_enhance=prompt_enhance,
 
658
 
659
  return session_state, pdf_path, generated_images[0] if generated_images else None, page_info, button_label
660
 
661
+ # New inference function with automatic aspect ratio
662
+ def infer_single_auto(
663
  prompt,
664
  seed=42,
665
  randomize_seed=False,
666
+ position_data=None,
667
+ image_index=0,
668
+ num_images=1,
669
  guidance_scale=1.0,
670
  num_inference_steps=8,
671
  prompt_enhance=True,
 
673
  custom_style_text="",
674
  ):
675
  """
676
+ Generates an image with automatically determined aspect ratio based on layout position.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
677
  """
678
  if randomize_seed:
679
  seed = random.randint(0, MAX_SEED)
680
 
681
+ # Automatically determine image size based on position
682
+ width, height = get_image_size_for_position(position_data, image_index, num_images)
683
 
684
  # Set up the generator for reproducibility
685
  generator = torch.Generator(device="cuda").manual_seed(seed)
686
 
687
  print(f"Original prompt: '{prompt}'")
688
  print(f"Style preset: '{style_preset}'")
689
+ print(f"Auto-selected size based on layout: {width}x{height}")
690
 
691
  # Apply style preset first
692
  styled_prompt, style_negative_prompt = apply_style_preset(prompt, style_preset, custom_style_text)
 
715
 
716
  return image, seed
717
 
718
+ # Keep the old infer function for backward compatibility (simplified)
719
+ infer = infer_single_auto
720
 
721
  # --- Examples and UI Layout ---
722
  examples = [
 
767
  run_button = gr.Button("Generate page 1", variant="primary")
768
  reset_button = gr.Button("Start New Document", variant="secondary")
769
 
770
+ # New row for Style Preset and Page Layout
771
+ with gr.Row():
772
+ with gr.Column(scale=1):
773
+ # Number of images slider (affects layout choices)
774
+ num_images_slider = gr.Slider(
775
+ label="Images per page",
776
+ minimum=1,
777
+ maximum=6,
778
+ step=1,
779
+ value=1,
780
+ info="Number of images to generate for the PDF (1-6)"
781
+ )
782
+
783
+ with gr.Column(scale=2):
784
+ layout_dropdown = gr.Dropdown(
785
+ label="Page Layout",
786
+ choices=[("Full Page", "full_page")],
787
+ value="full_page",
788
+ interactive=True,
789
+ info="How images are arranged on the page"
790
+ )
791
+
792
+ with gr.Column(scale=2):
793
+ # Create dropdown choices from loaded presets
794
+ style_choices = [(preset["label"], key) for key, preset in STYLE_PRESETS.items()]
795
+ style_preset = gr.Dropdown(
796
+ label="Style Preset",
797
+ choices=style_choices,
798
+ value="no_style",
799
+ interactive=True
800
+ )
801
+
802
+ with gr.Column(scale=2):
803
+ custom_style_text = gr.Textbox(
804
+ label="Custom Style Text",
805
+ placeholder="Enter custom style (e.g., 'oil painting')",
806
+ visible=False,
807
+ lines=1
808
+ )
809
+
810
  with gr.Row():
811
  with gr.Column(scale=1):
812
  result_preview = gr.Image(label="Preview", show_label=True, type="pil")
 
828
  randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
829
 
830
  with gr.Row():
 
 
 
 
 
831
  prompt_enhance = gr.Checkbox(label="Prompt Enhance", value=True)
832
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
833
  with gr.Row():
834
  guidance_scale = gr.Slider(
835
  label="Guidance scale (True CFG Scale)",
 
847
  value=8,
848
  )
849
 
850
+ # Add interaction to show/hide custom style text field
851
+ def toggle_custom_style(style_value):
852
+ return gr.update(visible=(style_value == "no_style"))
853
 
854
+ style_preset.change(
855
+ fn=toggle_custom_style,
856
+ inputs=[style_preset],
857
+ outputs=[custom_style_text]
858
+ )
859
 
860
+ # Update layout dropdown when number of images changes
861
+ def update_layout_choices(num_images):
862
+ choices = get_layout_choices(int(num_images))
863
+ return gr.update(choices=choices, value=choices[0][1] if choices else "default")
864
 
865
+ num_images_slider.change(
866
+ fn=update_layout_choices,
867
+ inputs=[num_images_slider],
868
+ outputs=[layout_dropdown]
869
+ )
870
 
871
  # Update examples to show some with different styles and image counts
872
  styled_examples = [
873
  ["A capybara wearing a suit holding a sign that reads Hello World", "no_style", "", 1],
874
+ ["sharks raining down on san francisco", "anime", "", 2],
875
+ ["A beautiful landscape with mountains and a lake", "watercolor", "", 3],
876
  ["A knight fighting a dragon", "medieval", "", 4],
877
+ ["Space battle with laser beams", "sci-fi", "", 5],
878
+ ["Detective investigating a mystery", "noir", "", 6],
879
  ]
880
 
881
  gr.Examples(
 
894
  prompt,
895
  seed,
896
  randomize_seed,
 
897
  guidance_scale,
898
  num_inference_steps,
899
  prompt_enhance,
page_layouts.yaml CHANGED
@@ -124,4 +124,122 @@ layouts:
124
  - [0.05, 0.05, 0.425, 0.425] # Top left (large)
125
  - [0.525, 0.05, 0.425, 0.425] # Top right (large)
126
  - [0.05, 0.525, 0.425, 0.425] # Bottom left
127
- - [0.525, 0.7, 0.425, 0.25] # Bottom right (small)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  - [0.05, 0.05, 0.425, 0.425] # Top left (large)
125
  - [0.525, 0.05, 0.425, 0.425] # Top right (large)
126
  - [0.05, 0.525, 0.425, 0.425] # Bottom left
127
+ - [0.525, 0.7, 0.425, 0.25] # Bottom right (small)
128
+
129
+ 5_images:
130
+ - id: "us_comic_action"
131
+ label: "US Comic - Action Scene"
132
+ description: "Classic American superhero comic layout with large establishing shot"
133
+ positions:
134
+ - [0.05, 0.05, 0.9, 0.4] # Wide establishing shot (panoramic)
135
+ - [0.05, 0.5, 0.283, 0.45] # Action panel 1
136
+ - [0.358, 0.5, 0.283, 0.45] # Action panel 2
137
+ - [0.666, 0.5, 0.283, 0.225] # Close-up 1
138
+ - [0.666, 0.75, 0.283, 0.225] # Close-up 2
139
+
140
+ - id: "manga_vertical_flow"
141
+ label: "Manga - Vertical Flow"
142
+ description: "Japanese manga style with vertical reading flow"
143
+ positions:
144
+ - [0.525, 0.05, 0.425, 0.35] # Top right (read first in manga)
145
+ - [0.05, 0.05, 0.425, 0.35] # Top left
146
+ - [0.525, 0.45, 0.425, 0.25] # Middle right
147
+ - [0.05, 0.45, 0.425, 0.25] # Middle left
148
+ - [0.05, 0.75, 0.9, 0.2] # Bottom wide panel
149
+
150
+ - id: "euro_bd_grid"
151
+ label: "European BD - Clear Grid"
152
+ description: "Franco-Belgian clear line style with regular panels"
153
+ positions:
154
+ - [0.05, 0.05, 0.425, 0.283] # Row 1 left
155
+ - [0.525, 0.05, 0.425, 0.283] # Row 1 right
156
+ - [0.05, 0.358, 0.9, 0.283] # Row 2 wide
157
+ - [0.05, 0.666, 0.425, 0.283] # Row 3 left
158
+ - [0.525, 0.666, 0.425, 0.283] # Row 3 right
159
+
160
+ - id: "diagonal_dynamic"
161
+ label: "Dynamic Diagonal"
162
+ description: "Action-oriented diagonal composition"
163
+ positions:
164
+ - [0.05, 0.05, 0.5, 0.4] # Large top left
165
+ - [0.6, 0.05, 0.35, 0.25] # Small top right
166
+ - [0.3, 0.35, 0.4, 0.3] # Center focus
167
+ - [0.05, 0.7, 0.35, 0.25] # Bottom left
168
+ - [0.6, 0.7, 0.35, 0.25] # Bottom right
169
+
170
+ - id: "spiral_focus"
171
+ label: "Spiral Focus"
172
+ description: "Panels arranged in a spiral leading to center"
173
+ positions:
174
+ - [0.05, 0.05, 0.35, 0.35] # Top left
175
+ - [0.425, 0.05, 0.525, 0.25] # Top wide
176
+ - [0.7, 0.35, 0.25, 0.6] # Right tall
177
+ - [0.425, 0.7, 0.525, 0.25] # Bottom wide
178
+ - [0.25, 0.35, 0.4, 0.3] # Center focus
179
+
180
+ 6_images:
181
+ - id: "classic_comic_grid"
182
+ label: "Classic Comic Grid"
183
+ description: "Traditional 2x3 American comic book grid"
184
+ positions:
185
+ - [0.05, 0.05, 0.425, 0.283] # Row 1 left
186
+ - [0.525, 0.05, 0.425, 0.283] # Row 1 right
187
+ - [0.05, 0.358, 0.425, 0.283] # Row 2 left
188
+ - [0.525, 0.358, 0.425, 0.283] # Row 2 right
189
+ - [0.05, 0.666, 0.425, 0.283] # Row 3 left
190
+ - [0.525, 0.666, 0.425, 0.283] # Row 3 right
191
+
192
+ - id: "manga_4koma"
193
+ label: "Manga - 4-Koma Plus"
194
+ description: "Japanese 4-panel strip with header and footer"
195
+ positions:
196
+ - [0.05, 0.05, 0.9, 0.15] # Header panel
197
+ - [0.05, 0.25, 0.425, 0.2] # Strip 1
198
+ - [0.525, 0.25, 0.425, 0.2] # Strip 2
199
+ - [0.05, 0.5, 0.425, 0.2] # Strip 3
200
+ - [0.525, 0.5, 0.425, 0.2] # Strip 4
201
+ - [0.05, 0.75, 0.9, 0.2] # Footer/punchline
202
+
203
+ - id: "euro_bd_cinematic"
204
+ label: "European BD - Cinematic"
205
+ description: "Cinematic European style with varied panel sizes"
206
+ positions:
207
+ - [0.05, 0.05, 0.9, 0.25] # Wide establishing
208
+ - [0.05, 0.35, 0.283, 0.25] # Small 1
209
+ - [0.358, 0.35, 0.283, 0.25] # Small 2
210
+ - [0.666, 0.35, 0.283, 0.25] # Small 3
211
+ - [0.05, 0.65, 0.425, 0.3] # Medium left
212
+ - [0.525, 0.65, 0.425, 0.3] # Medium right
213
+
214
+ - id: "action_sequence"
215
+ label: "Action Sequence"
216
+ description: "Fast-paced action scene layout"
217
+ positions:
218
+ - [0.05, 0.05, 0.6, 0.35] # Large action shot
219
+ - [0.7, 0.05, 0.25, 0.175] # Speed line 1
220
+ - [0.7, 0.25, 0.25, 0.175] # Speed line 2
221
+ - [0.05, 0.45, 0.283, 0.5] # Vertical impact 1
222
+ - [0.358, 0.45, 0.283, 0.5] # Vertical impact 2
223
+ - [0.666, 0.45, 0.283, 0.5] # Vertical impact 3
224
+
225
+ - id: "storytelling_flow"
226
+ label: "Storytelling Flow"
227
+ description: "Natural reading flow for narrative scenes"
228
+ positions:
229
+ - [0.05, 0.05, 0.425, 0.25] # Scene 1
230
+ - [0.525, 0.05, 0.425, 0.25] # Scene 2
231
+ - [0.05, 0.35, 0.9, 0.2] # Wide transition
232
+ - [0.05, 0.6, 0.425, 0.35] # Scene 3
233
+ - [0.525, 0.6, 0.425, 0.175] # Scene 4a
234
+ - [0.525, 0.8, 0.425, 0.175] # Scene 4b
235
+
236
+ - id: "focus_surround"
237
+ label: "Focus with Details"
238
+ description: "Central focus with surrounding detail panels"
239
+ positions:
240
+ - [0.25, 0.25, 0.5, 0.5] # Large center focus
241
+ - [0.05, 0.05, 0.35, 0.15] # Top left detail
242
+ - [0.6, 0.05, 0.35, 0.15] # Top right detail
243
+ - [0.05, 0.8, 0.35, 0.15] # Bottom left detail
244
+ - [0.6, 0.8, 0.35, 0.15] # Bottom right detail
245
+ - [0.05, 0.4, 0.15, 0.3] # Left side detail