FLUX.1-Kontext-portrait

Running on Zero

App Files Files Community

linoyts HF Staff commited on Jun 23

Commit

afe56ea

verified ·

1 Parent(s): 56732fb

Update kontext_pipeline.py

Browse files

Files changed (1) hide show

kontext_pipeline.py +45 -19

kontext_pipeline.py CHANGED Viewed

@@ -1,3 +1,17 @@
 import inspect
 from typing import Any, Callable, Dict, List, Optional, Union
@@ -13,12 +27,7 @@ from transformers import (
 )
 from diffusers.image_processor import PipelineImageInput, VaeImageProcessor
-from diffusers.loaders import (
-    FluxIPAdapterMixin,
-    FluxLoraLoaderMixin,
-    FromSingleFileMixin,
-    TextualInversionLoaderMixin,
-)
 from diffusers.models import AutoencoderKL, FluxTransformer2DModel
 from diffusers.schedulers import FlowMatchEulerDiscreteScheduler
 from diffusers.utils import (
@@ -29,14 +38,11 @@ from diffusers.utils import (
     scale_lora_layers,
     unscale_lora_layers,
 )
 from diffusers.utils.torch_utils import randn_tensor
-from diffusers import DiffusionPipeline
 from diffusers.pipelines.flux.pipeline_output import FluxPipelineOutput
 if is_torch_xla_available():
     import torch_xla.core.xla_model as xm
@@ -50,11 +56,27 @@ logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
 EXAMPLE_DOC_STRING = """
     Examples:
         ```py
-        # TODO
         ```
 """
 PREFERRED_KONTEXT_RESOLUTIONS = [
     (672, 1568),
     (688, 1504),
@@ -718,6 +740,7 @@ class FluxKontextPipeline(
         callback_on_step_end_tensor_inputs: List[str] = ["latents"],
         max_sequence_length: int = 512,
         max_area: int = 1024**2,
     ):
         r"""
         Function invoked when calling the pipeline for generation.
@@ -915,13 +938,16 @@ class FluxKontextPipeline(
         # 3. Preprocess image
         if not torch.is_tensor(image) or image.size(1) == self.latent_channels:
-            image_width, image_height = self.image_processor.get_default_height_width(image)
             aspect_ratio = image_width / image_height
-            # Kontext is trained on specific resolutions, using one of them is recommended
-            _, image_width, image_height = min(
-                (abs(aspect_ratio - w / h), w, h) for w, h in PREFERRED_KONTEXT_RESOLUTIONS
-            )
             image_width = image_width // multiple_of * multiple_of
             image_height = image_height // multiple_of * multiple_of
             image = self.image_processor.resize(image, image_height, image_width)
@@ -1085,4 +1111,4 @@ class FluxKontextPipeline(
         if not return_dict:
             return (image,)
-        return FluxPipelineOutput(images=image)

+# Copyright 2025 Black Forest Labs and The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import inspect
 from typing import Any, Callable, Dict, List, Optional, Union
 )
 from diffusers.image_processor import PipelineImageInput, VaeImageProcessor
+from diffusers.loaders import FluxIPAdapterMixin, FluxLoraLoaderMixin, FromSingleFileMixin, TextualInversionLoaderMixin
 from diffusers.models import AutoencoderKL, FluxTransformer2DModel
 from diffusers.schedulers import FlowMatchEulerDiscreteScheduler
 from diffusers.utils import (
     scale_lora_layers,
     unscale_lora_layers,
 )
 from diffusers.utils.torch_utils import randn_tensor
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline
 from diffusers.pipelines.flux.pipeline_output import FluxPipelineOutput
 if is_torch_xla_available():
     import torch_xla.core.xla_model as xm
 EXAMPLE_DOC_STRING = """
     Examples:
         ```py
+        >>> import torch
+        >>> from diffusers import FluxKontextPipeline
+        >>> from diffusers.utils import load_image
+        >>> pipe = FluxKontextPipeline.from_pretrained(
+        ...     "black-forest-labs/FLUX.1-kontext", transformer=transformer, torch_dtype=torch.bfloat16
+        ... )
+        >>> pipe.to("cuda")
+        >>> image = load_image("inputs/yarn-art-pikachu.png").convert("RGB")
+        >>> prompt = "Make Pikachu hold a sign that says 'Hugging Face is awesome', yarn art style, detailed, vibrant colors"
+        >>> image = pipe(
+        ...     image=image,
+        ...     prompt=prompt,
+        ...     guidance_scale=2.5,
+        ...     generator=torch.Generator().manual_seed(42),
+        ... ).images[0]
+        >>> image.save("output.png")
         ```
 """
 PREFERRED_KONTEXT_RESOLUTIONS = [
     (672, 1568),
     (688, 1504),
         callback_on_step_end_tensor_inputs: List[str] = ["latents"],
         max_sequence_length: int = 512,
         max_area: int = 1024**2,
+        _auto_resize: bool = True,
     ):
         r"""
         Function invoked when calling the pipeline for generation.
         # 3. Preprocess image
         if not torch.is_tensor(image) or image.size(1) == self.latent_channels:
+            if isinstance(image, list):
+                image_width, image_height = self.image_processor.get_default_height_width(image[0])
+            else:
+                image_width, image_height = self.image_processor.get_default_height_width(image)
             aspect_ratio = image_width / image_height
+            if _auto_resize:
+                # Kontext is trained on specific resolutions, using one of them is recommended
+                _, image_width, image_height = min(
+                    (abs(aspect_ratio - w / h), w, h) for w, h in PREFERRED_KONTEXT_RESOLUTIONS
+                )
             image_width = image_width // multiple_of * multiple_of
             image_height = image_height // multiple_of * multiple_of
             image = self.image_processor.resize(image, image_height, image_width)
         if not return_dict:
             return (image,)
+        return FluxPipelineOutput(images=image)