Spaces:

rupeshs
/

fastsdcpu

Running

App Files Files Community

rupeshs commited on Nov 19, 2023

Commit

4722402

1 Parent(s): 76de3b8

App update

Browse files

Files changed (27) hide show

app.py +109 -9
app_settings.py +23 -0
backend/__pycache__/__init__.cpython-311.pyc +0 -0
backend/__pycache__/image_saver.cpython-311.pyc +0 -0
backend/__pycache__/lcm_text_to_image.cpython-311.pyc +0 -0
backend/lcm_text_to_image.py +256 -44
backend/lcmdiffusion/pipelines/openvino/__pycache__/lcm_ov_pipeline.cpython-311.pyc +0 -0
backend/lcmdiffusion/pipelines/openvino/__pycache__/lcm_scheduler.cpython-311.pyc +0 -0
backend/lcmdiffusion/pipelines/openvino/lcm_ov_pipeline.py +86 -29
backend/lcmdiffusion/pipelines/openvino/lcm_scheduler.py +67 -20
backend/models/__pycache__/lcmdiffusion_setting.cpython-311.pyc +0 -0
backend/models/lcmdiffusion_setting.py +15 -5
constants.py +8 -2
context.py +13 -8
frontend/__pycache__/utils.cpython-311.pyc +0 -0
frontend/gui/__pycache__/app_window.cpython-311.pyc +0 -0
frontend/gui/__pycache__/image_generator_worker.cpython-311.pyc +0 -0
frontend/gui/__pycache__/ui.cpython-311.pyc +0 -0
frontend/gui/app_window.py +177 -22
frontend/utils.py +22 -4
frontend/webui/text_to_image_ui.py +41 -49
frontend/webui/ui.py +2 -2
models/__pycache__/interface_types.cpython-311.pyc +0 -0
models/__pycache__/settings.cpython-311.pyc +0 -0
models/settings.py +2 -2
paths.py +11 -2
utils.py +11 -0

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ from context import Context
 from constants import APP_VERSION, LCM_DEFAULT_MODEL_OPENVINO
 from models.interface_types import InterfaceType
 from constants import DEVICE
-from frontend.webui.ui import start_webui
 parser = ArgumentParser(description=f"FAST SD CPU {constants.APP_VERSION}")
 parser.add_argument(
     "-s",
@@ -28,6 +28,12 @@ group.add_argument(
     action="store_true",
     help="Start Web UI",
 )
 group.add_argument(
     "-v",
     "--version",
@@ -66,8 +72,8 @@ parser.add_argument(
 parser.add_argument(
     "--guidance_scale",
     type=int,
-    help="Guidance scale,default : 8.0",
-    default=8.0,
 )
 parser.add_argument(
@@ -98,28 +104,122 @@ parser.add_argument(
     action="store_false",
     help="Use safety checker",
 )
 parser.add_argument(
     "-i",
     "--interactive",
     action="store_true",
     help="Interactive CLI mode",
 )
 args = parser.parse_args()
 if args.version:
     print(APP_VERSION)
     exit()
-parser.print_help()
 show_system_info()
 print(f"Using device : {constants.DEVICE}")
 app_settings = AppSettings()
 app_settings.load()
-start_webui(
         app_settings,
-        args.share,)

 from constants import APP_VERSION, LCM_DEFAULT_MODEL_OPENVINO
 from models.interface_types import InterfaceType
 from constants import DEVICE
 parser = ArgumentParser(description=f"FAST SD CPU {constants.APP_VERSION}")
 parser.add_argument(
     "-s",
     action="store_true",
     help="Start Web UI",
 )
+group.add_argument(
+    "-r",
+    "--realtime",
+    action="store_true",
+    help="Start realtime inference UI(experimental)",
+)
 group.add_argument(
     "-v",
     "--version",
 parser.add_argument(
     "--guidance_scale",
     type=int,
+    help="Guidance scale,default : 1.0",
+    default=1.0,
 )
 parser.add_argument(
     action="store_false",
     help="Use safety checker",
 )
+parser.add_argument(
+    "--use_lcm_lora",
+    action="store_true",
+    help="Use LCM-LoRA",
+)
+parser.add_argument(
+    "--base_model_id",
+    type=str,
+    help="LCM LoRA base model ID,Default Lykon/dreamshaper-8",
+    default="Lykon/dreamshaper-8",
+)
+parser.add_argument(
+    "--lcm_lora_id",
+    type=str,
+    help="LCM LoRA model ID,Default latent-consistency/lcm-lora-sdv1-5",
+    default="latent-consistency/lcm-lora-sdv1-5",
+)
 parser.add_argument(
     "-i",
     "--interactive",
     action="store_true",
     help="Interactive CLI mode",
 )
+parser.add_argument(
+    "--use_tiny_auto_encoder",
+    action="store_true",
+    help="Use tiny auto encoder for SD (TAESD)",
+)
 args = parser.parse_args()
 if args.version:
     print(APP_VERSION)
     exit()
+# parser.print_help()
 show_system_info()
 print(f"Using device : {constants.DEVICE}")
 app_settings = AppSettings()
 app_settings.load()
+print(
+    f"Found {len(app_settings.stable_diffsuion_models)} stable diffusion models in config/stable-diffusion-models.txt"
+)
+print(
+    f"Found {len(app_settings.lcm_lora_models)} LCM-LoRA models in config/lcm-lora-models.txt"
+)
+print(
+    f"Found {len(app_settings.openvino_lcm_models)} OpenVINO LCM models in config/openvino-lcm-models.txt"
+)
+if args.gui:
+    from frontend.gui.ui import start_gui
+    print("Starting desktop GUI mode(Qt)")
+    start_gui(
+        [],
+        app_settings,
+    )
+elif args.webui:
+    from frontend.webui.ui import start_webui
+    print("Starting web UI mode")
+    start_webui(
         app_settings,
+        args.share,
+    )
+elif args.realtime:
+    from frontend.webui.realtime_ui import start_realtime_text_to_image
+    print("Starting realtime text to image(EXPERIMENTAL)")
+    start_realtime_text_to_image(args.share)
+else:
+    context = Context(InterfaceType.CLI)
+    config = app_settings.settings
+    if args.use_openvino:
+        config.lcm_diffusion_setting.lcm_model_id = LCM_DEFAULT_MODEL_OPENVINO
+    else:
+        config.lcm_diffusion_setting.lcm_model_id = args.lcm_model_id
+    config.lcm_diffusion_setting.prompt = args.prompt
+    config.lcm_diffusion_setting.image_height = args.image_height
+    config.lcm_diffusion_setting.image_width = args.image_width
+    config.lcm_diffusion_setting.guidance_scale = args.guidance_scale
+    config.lcm_diffusion_setting.number_of_images = args.number_of_images
+    config.lcm_diffusion_setting.seed = args.seed
+    config.lcm_diffusion_setting.use_openvino = args.use_openvino
+    config.lcm_diffusion_setting.use_tiny_auto_encoder = args.use_tiny_auto_encoder
+    config.lcm_diffusion_setting.use_lcm_lora = args.use_lcm_lora
+    config.lcm_diffusion_setting.lcm_lora.base_model_id = args.base_model_id
+    config.lcm_diffusion_setting.lcm_lora.lcm_lora_id = args.lcm_lora_id
+    if args.seed > -1:
+        config.lcm_diffusion_setting.use_seed = True
+    else:
+        config.lcm_diffusion_setting.use_seed = False
+    config.lcm_diffusion_setting.use_offline_model = args.use_offline_model
+    config.lcm_diffusion_setting.use_safety_checker = args.use_safety_checker
+    if args.interactive:
+        while True:
+            user_input = input(">>")
+            if user_input == "exit":
+                break
+            config.lcm_diffusion_setting.prompt = user_input
+            context.generate_text_to_image(
+                settings=config,
+                device=DEVICE,
+            )
+    else:
+        context.generate_text_to_image(
+            settings=config,
+            device=DEVICE,
+        )
+from frontend.webui.hf_demo import start_demo_text_to_image
+print("Starting demo text to image")
+start_demo_text_to_image(True)

app_settings.py CHANGED Viewed

@@ -2,16 +2,39 @@ import yaml
 from os import path, makedirs
 from models.settings import Settings
 from paths import FastStableDiffusionPaths
 class AppSettings:
     def __init__(self):
         self.config_path = FastStableDiffusionPaths().get_app_settings_path()
     @property
     def settings(self):
         return self._config
     def load(self):
         if not path.exists(self.config_path):
             base_dir = path.dirname(self.config_path)

 from os import path, makedirs
 from models.settings import Settings
 from paths import FastStableDiffusionPaths
+from utils import get_models_from_text_file
+from constants import OPENVINO_LCM_MODELS_FILE, LCM_LORA_MODELS_FILE, SD_MODELS_FILE
 class AppSettings:
     def __init__(self):
         self.config_path = FastStableDiffusionPaths().get_app_settings_path()
+        self._stable_diffsuion_models = get_models_from_text_file(
+            FastStableDiffusionPaths().get_models_config_path(SD_MODELS_FILE)
+        )
+        self._lcm_lora_models = get_models_from_text_file(
+            FastStableDiffusionPaths().get_models_config_path(LCM_LORA_MODELS_FILE)
+        )
+        self._openvino_lcm_models = get_models_from_text_file(
+            FastStableDiffusionPaths().get_models_config_path(OPENVINO_LCM_MODELS_FILE)
+        )
     @property
     def settings(self):
         return self._config
+    @property
+    def stable_diffsuion_models(self):
+        return self._stable_diffsuion_models
+    @property
+    def openvino_lcm_models(self):
+        return self._openvino_lcm_models
+    @property
+    def lcm_lora_models(self):
+        return self._lcm_lora_models
     def load(self):
         if not path.exists(self.config_path):
             base_dir = path.dirname(self.config_path)

backend/__pycache__/__init__.cpython-311.pyc CHANGED Viewed

Binary files a/backend/__pycache__/__init__.cpython-311.pyc and b/backend/__pycache__/__init__.cpython-311.pyc differ

backend/__pycache__/image_saver.cpython-311.pyc CHANGED Viewed

Binary files a/backend/__pycache__/image_saver.cpython-311.pyc and b/backend/__pycache__/image_saver.cpython-311.pyc differ

backend/__pycache__/lcm_text_to_image.cpython-311.pyc CHANGED Viewed

Binary files a/backend/__pycache__/lcm_text_to_image.cpython-311.pyc and b/backend/__pycache__/lcm_text_to_image.cpython-311.pyc differ

backend/lcm_text_to_image.py CHANGED Viewed

@@ -1,20 +1,53 @@
 from typing import Any
-from diffusers import DiffusionPipeline
 from os import path
 import torch
 from backend.models.lcmdiffusion_setting import LCMDiffusionSetting
 import numpy as np
-from constants import DEVICE
-if DEVICE == "cpu":
     from backend.lcmdiffusion.pipelines.openvino.lcm_ov_pipeline import (
-        OVLatentConsistencyModelPipeline,
     )
     from backend.lcmdiffusion.pipelines.openvino.lcm_scheduler import (
-        LCMScheduler,
     )
 class LCMTextToImage:
     def __init__(
@@ -23,18 +56,125 @@ class LCMTextToImage:
     ) -> None:
         self.pipeline = None
         self.use_openvino = False
-        self.device = None
         self.previous_model_id = None
-    def _get_lcm_diffusion_pipeline_path(self) -> str:
-        script_path = path.dirname(path.abspath(__file__))
-        file_path = path.join(
-            script_path,
-            "lcmdiffusion",
-            "pipelines",
-            "latent_consistency_txt2img.py",
         )
-        return file_path
     def init(
         self,
@@ -42,44 +182,97 @@ class LCMTextToImage:
         use_openvino: bool = False,
         device: str = "cpu",
         use_local_model: bool = False,
     ) -> None:
         self.device = device
         self.use_openvino = use_openvino
-        if self.pipeline is None or self.previous_model_id != model_id:
-            if self.use_openvino and DEVICE == "cpu":
                 if self.pipeline:
                     del self.pipeline
-                scheduler = LCMScheduler.from_pretrained(
-                    model_id,
-                    subfolder="scheduler",
-                )
-                self.pipeline = OVLatentConsistencyModelPipeline.from_pretrained(
                     model_id,
-                    scheduler=scheduler,
-                    compile=False,
                     local_files_only=use_local_model,
                 )
             else:
                 if self.pipeline:
                     del self.pipeline
-                self.pipeline = DiffusionPipeline.from_pretrained(
-                    model_id,
-                    custom_pipeline=self._get_lcm_diffusion_pipeline_path(),
-                    custom_revision="main",
-                    local_files_only=use_local_model,
-                )
-                self.pipeline.to(
-                    torch_device=self.device,
-                    torch_dtype=torch.float32,
-                )
             self.previous_model_id = model_id
     def generate(
         self,
         lcm_diffusion_setting: LCMDiffusionSetting,
         reshape: bool = False,
     ) -> Any:
         if lcm_diffusion_setting.use_seed:
             cur_seed = lcm_diffusion_setting.seed
             if self.use_openvino:
@@ -87,12 +280,12 @@ class LCMTextToImage:
             else:
                 torch.manual_seed(cur_seed)
-        if self.use_openvino and DEVICE == "cpu":
             print("Using OpenVINO")
             if reshape:
                 print("Reshape and compile")
                 self.pipeline.reshape(
-                    batch_size=1,
                     height=lcm_diffusion_setting.image_height,
                     width=lcm_diffusion_setting.image_width,
                     num_images_per_prompt=lcm_diffusion_setting.number_of_images,
@@ -102,14 +295,33 @@ class LCMTextToImage:
         if not lcm_diffusion_setting.use_safety_checker:
             self.pipeline.safety_checker = None
-        result_images = self.pipeline(
-            prompt=lcm_diffusion_setting.prompt,
-            num_inference_steps=lcm_diffusion_setting.inference_steps,
-            guidance_scale=lcm_diffusion_setting.guidance_scale,
-            width=lcm_diffusion_setting.image_width,
-            height=lcm_diffusion_setting.image_height,
-            output_type="pil",
-            num_images_per_prompt=lcm_diffusion_setting.number_of_images,
-        ).images
         return result_images

 from typing import Any
+from diffusers import (
+    DiffusionPipeline,
+    AutoencoderTiny,
+    LCMScheduler,
+    UNet2DConditionModel,
+)
 from os import path
 import torch
 from backend.models.lcmdiffusion_setting import LCMDiffusionSetting
 import numpy as np
+from constants import (
+    DEVICE,
+    LCM_DEFAULT_MODEL,
+    TAESD_MODEL,
+    TAESDXL_MODEL,
+    TAESD_MODEL_OPENVINO,
+)
+from huggingface_hub import model_info
+from backend.models.lcmdiffusion_setting import LCMLora
+from backend.device import is_openvino_device
+if is_openvino_device():
+    from huggingface_hub import snapshot_download
+    from optimum.intel.openvino.modeling_diffusion import OVModelVaeDecoder, OVBaseModel
+    # from optimum.intel.openvino.modeling_diffusion import OVStableDiffusionPipeline
     from backend.lcmdiffusion.pipelines.openvino.lcm_ov_pipeline import (
+        OVStableDiffusionPipeline,
     )
     from backend.lcmdiffusion.pipelines.openvino.lcm_scheduler import (
+        LCMScheduler as OpenVinoLCMscheduler,
     )
+    class CustomOVModelVaeDecoder(OVModelVaeDecoder):
+        def __init__(
+            self,
+            model,
+            parent_model,
+            ov_config=None,
+            model_dir=None,
+        ):
+            super(OVModelVaeDecoder, self).__init__(
+                model,
+                parent_model,
+                ov_config,
+                "vae_decoder",
+                model_dir,
+            )
 class LCMTextToImage:
     def __init__(
     ) -> None:
         self.pipeline = None
         self.use_openvino = False
+        self.device = ""
         self.previous_model_id = None
+        self.previous_use_tae_sd = False
+        self.previous_use_lcm_lora = False
+        self.torch_data_type = (
+            torch.float32 if is_openvino_device() or DEVICE == "mps" else torch.float16
+        )
+        print(f"Torch datatype : {self.torch_data_type}")
+    def _get_lcm_pipeline(
+        self,
+        lcm_model_id: str,
+        base_model_id: str,
+        use_local_model: bool,
+    ):
+        pipeline = None
+        unet = UNet2DConditionModel.from_pretrained(
+            lcm_model_id,
+            torch_dtype=torch.float32,
+            local_files_only=use_local_model
+            # resume_download=True,
+        )
+        pipeline = DiffusionPipeline.from_pretrained(
+            base_model_id,
+            unet=unet,
+            torch_dtype=torch.float32,
+            local_files_only=use_local_model
+            # resume_download=True,
+        )
+        pipeline.scheduler = LCMScheduler.from_config(pipeline.scheduler.config)
+        return pipeline
+    def get_tiny_decoder_vae_model(self) -> str:
+        pipeline_class = self.pipeline.__class__.__name__
+        print(f"Pipeline class : {pipeline_class}")
+        if (
+            pipeline_class == "LatentConsistencyModelPipeline"
+            or pipeline_class == "StableDiffusionPipeline"
+        ):
+            return TAESD_MODEL
+        elif pipeline_class == "StableDiffusionXLPipeline":
+            return TAESDXL_MODEL
+        elif pipeline_class == "OVStableDiffusionPipeline":
+            return TAESD_MODEL_OPENVINO
+    def _get_lcm_model_pipeline(
+        self,
+        model_id: str,
+        use_local_model,
+    ):
+        pipeline = None
+        if model_id == LCM_DEFAULT_MODEL:
+            pipeline = DiffusionPipeline.from_pretrained(
+                model_id,
+                local_files_only=use_local_model,
+            )
+        elif model_id == "latent-consistency/lcm-sdxl":
+            pipeline = self._get_lcm_pipeline(
+                model_id,
+                "stabilityai/stable-diffusion-xl-base-1.0",
+                use_local_model,
+            )
+        elif model_id == "latent-consistency/lcm-ssd-1b":
+            pipeline = self._get_lcm_pipeline(
+                model_id,
+                "segmind/SSD-1B",
+                use_local_model,
+            )
+        return pipeline
+    def _get_lcm_lora_pipeline(
+        self,
+        base_model_id: str,
+        lcm_lora_id: str,
+        use_local_model: bool,
+    ):
+        pipeline = DiffusionPipeline.from_pretrained(
+            base_model_id,
+            torch_dtype=self.torch_data_type,
+            local_files_only=use_local_model,
         )
+        pipeline.load_lora_weights(
+            lcm_lora_id,
+            local_files_only=use_local_model,
+        )
+        pipeline.scheduler = LCMScheduler.from_config(pipeline.scheduler.config)
+        pipeline.fuse_lora()
+        pipeline.unet.to(memory_format=torch.channels_last)
+        return pipeline
+    def _pipeline_to_device(self):
+        print(f"Pipeline device : {DEVICE}")
+        print(f"Pipeline dtype : {self.torch_data_type}")
+        self.pipeline.to(
+            torch_device=DEVICE,
+            torch_dtype=self.torch_data_type,
+        )
+    def _add_freeu(self):
+        pipeline_class = self.pipeline.__class__.__name__
+        if pipeline_class == "StableDiffusionPipeline":
+            print("Add FreeU - SD")
+            self.pipeline.enable_freeu(
+                s1=0.9,
+                s2=0.2,
+                b1=1.2,
+                b2=1.4,
+            )
+        elif pipeline_class == "StableDiffusionXLPipeline":
+            print("Add FreeU - SDXL")
+            self.pipeline.enable_freeu(
+                s1=0.6,
+                s2=0.4,
+                b1=1.1,
+                b2=1.2,
+            )
     def init(
         self,
         use_openvino: bool = False,
         device: str = "cpu",
         use_local_model: bool = False,
+        use_tiny_auto_encoder: bool = False,
+        use_lora: bool = False,
+        lcm_lora: LCMLora = LCMLora(),
     ) -> None:
         self.device = device
         self.use_openvino = use_openvino
+        if (
+            self.pipeline is None
+            or self.previous_model_id != model_id
+            or self.previous_use_tae_sd != use_tiny_auto_encoder
+            or self.previous_lcm_lora_base_id != lcm_lora.base_model_id
+            or self.previous_lcm_lora_id != lcm_lora.lcm_lora_id
+            or self.previous_use_lcm_lora != use_lora
+        ):
+            if self.use_openvino and is_openvino_device():
                 if self.pipeline:
                     del self.pipeline
+                    self.pipeline = None
+                self.pipeline = OVStableDiffusionPipeline.from_pretrained(
                     model_id,
                     local_files_only=use_local_model,
+                    ov_config={"CACHE_DIR": ""},
+                    device=DEVICE.upper(),
                 )
+                if use_tiny_auto_encoder:
+                    print("Using Tiny Auto Encoder (OpenVINO)")
+                    taesd_dir = snapshot_download(
+                        repo_id=self.get_tiny_decoder_vae_model(),
+                        local_files_only=use_local_model,
+                    )
+                    self.pipeline.vae_decoder = CustomOVModelVaeDecoder(
+                        model=OVBaseModel.load_model(
+                            f"{taesd_dir}/vae_decoder/openvino_model.xml"
+                        ),
+                        parent_model=self.pipeline,
+                        model_dir=taesd_dir,
+                    )
             else:
                 if self.pipeline:
                     del self.pipeline
+                    self.pipeline = None
+                if use_lora:
+                    print("Init LCM-LoRA pipeline")
+                    self.pipeline = self._get_lcm_lora_pipeline(
+                        lcm_lora.base_model_id,
+                        lcm_lora.lcm_lora_id,
+                        use_local_model,
+                    )
+                else:
+                    print("Init LCM Model pipeline")
+                    self.pipeline = self._get_lcm_model_pipeline(
+                        model_id,
+                        use_local_model,
+                    )
+                if use_tiny_auto_encoder:
+                    vae_model = self.get_tiny_decoder_vae_model()
+                    print(f"Using Tiny Auto Encoder {vae_model}")
+                    self.pipeline.vae = AutoencoderTiny.from_pretrained(
+                        vae_model,
+                        torch_dtype=torch.float32,
+                        local_files_only=use_local_model,
+                    )
+                self._pipeline_to_device()
             self.previous_model_id = model_id
+            self.previous_use_tae_sd = use_tiny_auto_encoder
+            self.previous_lcm_lora_base_id = lcm_lora.base_model_id
+            self.previous_lcm_lora_id = lcm_lora.lcm_lora_id
+            self.previous_use_lcm_lora = use_lora
+            print(f"Model :{model_id}")
+            print(f"Pipeline : {self.pipeline}")
+            self.pipeline.scheduler = LCMScheduler.from_config(
+                self.pipeline.scheduler.config,
+                beta_start=0.001,
+                beta_end=0.01,
+            )
+            if use_lora:
+                self._add_freeu()
     def generate(
         self,
         lcm_diffusion_setting: LCMDiffusionSetting,
         reshape: bool = False,
     ) -> Any:
+        guidance_scale = lcm_diffusion_setting.guidance_scale
         if lcm_diffusion_setting.use_seed:
             cur_seed = lcm_diffusion_setting.seed
             if self.use_openvino:
             else:
                 torch.manual_seed(cur_seed)
+        if lcm_diffusion_setting.use_openvino and is_openvino_device():
             print("Using OpenVINO")
             if reshape:
                 print("Reshape and compile")
                 self.pipeline.reshape(
+                    batch_size=-1,
                     height=lcm_diffusion_setting.image_height,
                     width=lcm_diffusion_setting.image_width,
                     num_images_per_prompt=lcm_diffusion_setting.number_of_images,
         if not lcm_diffusion_setting.use_safety_checker:
             self.pipeline.safety_checker = None
+        if (
+            not lcm_diffusion_setting.use_lcm_lora
+            and not lcm_diffusion_setting.use_openvino
+            and lcm_diffusion_setting.guidance_scale != 1.0
+        ):
+            print("Not using LCM-LoRA so setting guidance_scale 1.0")
+            guidance_scale = 1.0
+        if lcm_diffusion_setting.use_openvino:
+            result_images = self.pipeline(
+                prompt=lcm_diffusion_setting.prompt,
+                negative_prompt=lcm_diffusion_setting.negative_prompt,
+                num_inference_steps=lcm_diffusion_setting.inference_steps,
+                guidance_scale=guidance_scale,
+                width=lcm_diffusion_setting.image_width,
+                height=lcm_diffusion_setting.image_height,
+                num_images_per_prompt=lcm_diffusion_setting.number_of_images,
+            ).images
+        else:
+            result_images = self.pipeline(
+                prompt=lcm_diffusion_setting.prompt,
+                negative_prompt=lcm_diffusion_setting.negative_prompt,
+                num_inference_steps=lcm_diffusion_setting.inference_steps,
+                guidance_scale=guidance_scale,
+                width=lcm_diffusion_setting.image_width,
+                height=lcm_diffusion_setting.image_height,
+                num_images_per_prompt=lcm_diffusion_setting.number_of_images,
+            ).images
         return result_images

backend/lcmdiffusion/pipelines/openvino/__pycache__/lcm_ov_pipeline.cpython-311.pyc CHANGED Viewed

Binary files a/backend/lcmdiffusion/pipelines/openvino/__pycache__/lcm_ov_pipeline.cpython-311.pyc and b/backend/lcmdiffusion/pipelines/openvino/__pycache__/lcm_ov_pipeline.cpython-311.pyc differ

backend/lcmdiffusion/pipelines/openvino/__pycache__/lcm_scheduler.cpython-311.pyc CHANGED Viewed

Binary files a/backend/lcmdiffusion/pipelines/openvino/__pycache__/lcm_scheduler.cpython-311.pyc and b/backend/lcmdiffusion/pipelines/openvino/__pycache__/lcm_scheduler.cpython-311.pyc differ

backend/lcmdiffusion/pipelines/openvino/lcm_ov_pipeline.py CHANGED Viewed

@@ -11,7 +11,14 @@ import openvino
 import torch
 from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
-from optimum.intel.openvino.modeling_diffusion import OVStableDiffusionPipeline, OVModelUnet, OVModelVaeDecoder, OVModelTextEncoder, OVModelVaeEncoder, VaeImageProcessor
 from optimum.utils import (
     DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER,
     DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER,
@@ -22,8 +29,10 @@ from optimum.utils import (
 from diffusers import logging
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
 class LCMOVModelUnet(OVModelUnet):
     def __call__(
         self,
@@ -52,8 +61,8 @@ class LCMOVModelUnet(OVModelUnet):
         outputs = self.request(inputs, shared_memory=True)
         return list(outputs.values())
-class OVLatentConsistencyModelPipeline(OVStableDiffusionPipeline):
     def __init__(
         self,
         vae_decoder: openvino.runtime.Model,
@@ -78,20 +87,32 @@ class OVLatentConsistencyModelPipeline(OVStableDiffusionPipeline):
         self.is_dynamic = dynamic_shapes
         self.ov_config = ov_config if ov_config is not None else {}
         self._model_save_dir = (
-            Path(model_save_dir.name) if isinstance(model_save_dir, TemporaryDirectory) else model_save_dir
         )
         self.vae_decoder = OVModelVaeDecoder(vae_decoder, self)
         self.unet = LCMOVModelUnet(unet, self)
-        self.text_encoder = OVModelTextEncoder(text_encoder, self) if text_encoder is not None else None
         self.text_encoder_2 = (
-            OVModelTextEncoder(text_encoder_2, self, model_name=DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER)
             if text_encoder_2 is not None
             else None
         )
-        self.vae_encoder = OVModelVaeEncoder(vae_encoder, self) if vae_encoder is not None else None
         if "block_out_channels" in self.vae_decoder.config:
-            self.vae_scale_factor = 2 ** (len(self.vae_decoder.config["block_out_channels"]) - 1)
         else:
             self.vae_scale_factor = 8
@@ -119,7 +140,9 @@ class OVLatentConsistencyModelPipeline(OVStableDiffusionPipeline):
         }
         for name in sub_models.keys():
             self._internal_dict[name] = (
-                ("optimum", sub_models[name].__class__.__name__) if sub_models[name] is not None else (None, None)
             )
         self._internal_dict.pop("vae", None)
@@ -132,7 +155,7 @@ class OVLatentConsistencyModelPipeline(OVStableDiffusionPipeline):
         width: int = -1,
         num_images_per_prompt: int = -1,
         tokenizer_max_length: int = -1,
-    ):
         if batch_size == -1 or num_images_per_prompt == -1:
             batch_size = -1
         else:
@@ -152,14 +175,17 @@ class OVLatentConsistencyModelPipeline(OVStableDiffusionPipeline):
                     if in_channels.is_dynamic:
                         logger.warning(
                             "Could not identify `in_channels` from the unet configuration, to statically reshape the unet please provide a configuration."
-                        )
                         self.is_dynamic = True
                 shapes[inputs] = [batch_size, in_channels, height, width]
             elif inputs.get_any_name() == "timestep_cond":
                 shapes[inputs] = [batch_size, inputs.get_partial_shape()[1]]
             elif inputs.get_any_name() == "text_embeds":
-                shapes[inputs] = [batch_size, self.text_encoder_2.config["projection_dim"]]
             elif inputs.get_any_name() == "time_ids":
                 shapes[inputs] = [batch_size, inputs.get_partial_shape()[1]]
             else:
@@ -180,10 +206,10 @@ class OVLatentConsistencyModelPipeline(OVStableDiffusionPipeline):
         embedding vectors with shape `(len(timesteps), embedding_dim)`
         """
         assert len(w.shape) == 1
-        w = w * 1000.
         half_dim = embedding_dim // 2
-        emb = np.log(np.array(10000.)) / (half_dim - 1)
         emb = np.exp(np.arange(half_dim, dtype=dtype) * -emb)
         emb = w.astype(dtype)[:, None] * emb[None, :]
         emb = np.concatenate([np.sin(emb), np.cos(emb)], axis=1)
@@ -276,7 +302,9 @@ class OVLatentConsistencyModelPipeline(OVStableDiffusionPipeline):
             list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work"
             (nsfw) content, according to the `safety_checker`.
         """
-        height = height or self.unet.config.get("sample_size", 64) * self.vae_scale_factor
         width = width or self.unet.config.get("sample_size", 64) * self.vae_scale_factor
         # check inputs. Raise error if not correct
@@ -296,9 +324,11 @@ class OVLatentConsistencyModelPipeline(OVStableDiffusionPipeline):
             generator = np.random
         # Create torch.Generator instance with same state as np.random.RandomState
-        torch_generator = torch.Generator().manual_seed(int(generator.get_state()[1][0]))
-        #do_classifier_free_guidance = guidance_scale > 1.0
         # NOTE: when a LCM is distilled from an LDM via latent consistency distillation (Algorithm 1) with guided
         # distillation, the forward pass of the LCM learns to approximate sampling from the LDM using CFG with the
@@ -313,7 +343,11 @@ class OVLatentConsistencyModelPipeline(OVStableDiffusionPipeline):
         )
         # set timesteps
-        self.scheduler.set_timesteps(num_inference_steps, "cpu", original_inference_steps=original_inference_steps)
         timesteps = self.scheduler.timesteps
         latents = self.prepare_latents(
@@ -328,7 +362,9 @@ class OVLatentConsistencyModelPipeline(OVStableDiffusionPipeline):
         # Get Guidance Scale Embedding
         w = np.tile(guidance_scale - 1, batch_size * num_images_per_prompt)
-        w_embedding = self.get_guidance_scale_embedding(w, embedding_dim=self.unet.config.get("time_cond_proj_dim", 256))
         # Adapted from diffusers to extend it for other runtimes than ORT
         timestep_dtype = self.unet.input_dtype.get("timestep", np.float32)
@@ -337,32 +373,46 @@ class OVLatentConsistencyModelPipeline(OVStableDiffusionPipeline):
         # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
         # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
         # and should be between [0, 1]
-        accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
         extra_step_kwargs = {}
         if accepts_eta:
             extra_step_kwargs["eta"] = eta
-        accepts_generator = "generator" in set(inspect.signature(self.scheduler.step).parameters.keys())
         if accepts_generator:
             extra_step_kwargs["generator"] = torch_generator
         num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
         for i, t in enumerate(self.progress_bar(timesteps)):
             # predict the noise residual
             timestep = np.array([t], dtype=timestep_dtype)
-            noise_pred = self.unet(sample=latents, timestep=timestep, timestep_cond = w_embedding, encoder_hidden_states=prompt_embeds)[0]
             # compute the previous noisy sample x_t -> x_t-1
             latents, denoised = self.scheduler.step(
-                torch.from_numpy(noise_pred), t, torch.from_numpy(latents), **extra_step_kwargs, return_dict = False
             )
             latents, denoised = latents.numpy(), denoised.numpy()
             # call the callback, if provided
-            if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
                 if callback is not None and i % callback_steps == 0:
                     callback(i, t, latents)
@@ -373,7 +423,10 @@ class OVLatentConsistencyModelPipeline(OVStableDiffusionPipeline):
             denoised /= self.vae_decoder.config.get("scaling_factor", 0.18215)
             # it seems likes there is a strange result for using half-precision vae decoder if batchsize>1
             image = np.concatenate(
-                [self.vae_decoder(latent_sample=denoised[i : i + 1])[0] for i in range(latents.shape[0])]
             )
             image, has_nsfw_concept = self.run_safety_checker(image)
@@ -382,9 +435,13 @@ class OVLatentConsistencyModelPipeline(OVStableDiffusionPipeline):
         else:
             do_denormalize = [not has_nsfw for has_nsfw in has_nsfw_concept]
-        image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize)
         if not return_dict:
             return (image, has_nsfw_concept)
-        return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept)

 import torch
 from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
+from optimum.intel.openvino.modeling_diffusion import (
+    OVStableDiffusionPipeline,
+    OVModelUnet,
+    OVModelVaeDecoder,
+    OVModelTextEncoder,
+    OVModelVaeEncoder,
+    VaeImageProcessor,
+)
 from optimum.utils import (
     DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER,
     DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER,
 from diffusers import logging
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
 class LCMOVModelUnet(OVModelUnet):
     def __call__(
         self,
         outputs = self.request(inputs, shared_memory=True)
         return list(outputs.values())
+class OVLatentConsistencyModelPipeline(OVStableDiffusionPipeline):
     def __init__(
         self,
         vae_decoder: openvino.runtime.Model,
         self.is_dynamic = dynamic_shapes
         self.ov_config = ov_config if ov_config is not None else {}
         self._model_save_dir = (
+            Path(model_save_dir.name)
+            if isinstance(model_save_dir, TemporaryDirectory)
+            else model_save_dir
         )
         self.vae_decoder = OVModelVaeDecoder(vae_decoder, self)
         self.unet = LCMOVModelUnet(unet, self)
+        self.text_encoder = (
+            OVModelTextEncoder(text_encoder, self) if text_encoder is not None else None
+        )
         self.text_encoder_2 = (
+            OVModelTextEncoder(
+                text_encoder_2,
+                self,
+                model_name=DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER,
+            )
             if text_encoder_2 is not None
             else None
         )
+        self.vae_encoder = (
+            OVModelVaeEncoder(vae_encoder, self) if vae_encoder is not None else None
+        )
         if "block_out_channels" in self.vae_decoder.config:
+            self.vae_scale_factor = 2 ** (
+                len(self.vae_decoder.config["block_out_channels"]) - 1
+            )
         else:
             self.vae_scale_factor = 8
         }
         for name in sub_models.keys():
             self._internal_dict[name] = (
+                ("optimum", sub_models[name].__class__.__name__)
+                if sub_models[name] is not None
+                else (None, None)
             )
         self._internal_dict.pop("vae", None)
         width: int = -1,
         num_images_per_prompt: int = -1,
         tokenizer_max_length: int = -1,
+    ):
         if batch_size == -1 or num_images_per_prompt == -1:
             batch_size = -1
         else:
                     if in_channels.is_dynamic:
                         logger.warning(
                             "Could not identify `in_channels` from the unet configuration, to statically reshape the unet please provide a configuration."
+                        )
                         self.is_dynamic = True
                 shapes[inputs] = [batch_size, in_channels, height, width]
             elif inputs.get_any_name() == "timestep_cond":
                 shapes[inputs] = [batch_size, inputs.get_partial_shape()[1]]
             elif inputs.get_any_name() == "text_embeds":
+                shapes[inputs] = [
+                    batch_size,
+                    self.text_encoder_2.config["projection_dim"],
+                ]
             elif inputs.get_any_name() == "time_ids":
                 shapes[inputs] = [batch_size, inputs.get_partial_shape()[1]]
             else:
         embedding vectors with shape `(len(timesteps), embedding_dim)`
         """
         assert len(w.shape) == 1
+        w = w * 1000.0
         half_dim = embedding_dim // 2
+        emb = np.log(np.array(10000.0)) / (half_dim - 1)
         emb = np.exp(np.arange(half_dim, dtype=dtype) * -emb)
         emb = w.astype(dtype)[:, None] * emb[None, :]
         emb = np.concatenate([np.sin(emb), np.cos(emb)], axis=1)
             list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work"
             (nsfw) content, according to the `safety_checker`.
         """
+        height = (
+            height or self.unet.config.get("sample_size", 64) * self.vae_scale_factor
+        )
         width = width or self.unet.config.get("sample_size", 64) * self.vae_scale_factor
         # check inputs. Raise error if not correct
             generator = np.random
         # Create torch.Generator instance with same state as np.random.RandomState
+        torch_generator = torch.Generator().manual_seed(
+            int(generator.get_state()[1][0])
+        )
+        # do_classifier_free_guidance = guidance_scale > 1.0
         # NOTE: when a LCM is distilled from an LDM via latent consistency distillation (Algorithm 1) with guided
         # distillation, the forward pass of the LCM learns to approximate sampling from the LDM using CFG with the
         )
         # set timesteps
+        self.scheduler.set_timesteps(
+            num_inference_steps,
+            "cpu",
+            original_inference_steps=original_inference_steps,
+        )
         timesteps = self.scheduler.timesteps
         latents = self.prepare_latents(
         # Get Guidance Scale Embedding
         w = np.tile(guidance_scale - 1, batch_size * num_images_per_prompt)
+        w_embedding = self.get_guidance_scale_embedding(
+            w, embedding_dim=self.unet.config.get("time_cond_proj_dim", 256)
+        )
         # Adapted from diffusers to extend it for other runtimes than ORT
         timestep_dtype = self.unet.input_dtype.get("timestep", np.float32)
         # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
         # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
         # and should be between [0, 1]
+        accepts_eta = "eta" in set(
+            inspect.signature(self.scheduler.step).parameters.keys()
+        )
         extra_step_kwargs = {}
         if accepts_eta:
             extra_step_kwargs["eta"] = eta
+        accepts_generator = "generator" in set(
+            inspect.signature(self.scheduler.step).parameters.keys()
+        )
         if accepts_generator:
             extra_step_kwargs["generator"] = torch_generator
         num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
         for i, t in enumerate(self.progress_bar(timesteps)):
             # predict the noise residual
             timestep = np.array([t], dtype=timestep_dtype)
+            noise_pred = self.unet(
+                sample=latents,
+                timestep=timestep,
+                timestep_cond=w_embedding,
+                encoder_hidden_states=prompt_embeds,
+            )[0]
             # compute the previous noisy sample x_t -> x_t-1
             latents, denoised = self.scheduler.step(
+                torch.from_numpy(noise_pred),
+                t,
+                torch.from_numpy(latents),
+                **extra_step_kwargs,
+                return_dict=False,
             )
             latents, denoised = latents.numpy(), denoised.numpy()
             # call the callback, if provided
+            if i == len(timesteps) - 1 or (
+                (i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0
+            ):
                 if callback is not None and i % callback_steps == 0:
                     callback(i, t, latents)
             denoised /= self.vae_decoder.config.get("scaling_factor", 0.18215)
             # it seems likes there is a strange result for using half-precision vae decoder if batchsize>1
             image = np.concatenate(
+                [
+                    self.vae_decoder(latent_sample=denoised[i : i + 1])[0]
+                    for i in range(latents.shape[0])
+                ]
             )
             image, has_nsfw_concept = self.run_safety_checker(image)
         else:
             do_denormalize = [not has_nsfw for has_nsfw in has_nsfw_concept]
+        image = self.image_processor.postprocess(
+            image, output_type=output_type, do_denormalize=do_denormalize
+        )
         if not return_dict:
             return (image, has_nsfw_concept)
+        return StableDiffusionPipelineOutput(
+            images=image, nsfw_content_detected=has_nsfw_concept
+        )

backend/lcmdiffusion/pipelines/openvino/lcm_scheduler.py CHANGED Viewed

@@ -213,17 +213,27 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
         if trained_betas is not None:
             self.betas = torch.tensor(trained_betas, dtype=torch.float32)
         elif beta_schedule == "linear":
-            self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
         elif beta_schedule == "scaled_linear":
             # this schedule is very specific to the latent diffusion model.
             self.betas = (
-                torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
             )
         elif beta_schedule == "squaredcos_cap_v2":
             # Glide cosine schedule
             self.betas = betas_for_alpha_bar(num_train_timesteps)
         else:
-            raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}")
         # Rescale for zero SNR
         if rescale_betas_zero_snr:
@@ -236,14 +246,18 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
         # For the final step, there is no previous alphas_cumprod because we are already at 0
         # `set_alpha_to_one` decides whether we set this parameter simply to one or
         # whether we use the final alpha of the "non-previous" one.
-        self.final_alpha_cumprod = torch.tensor(1.0) if set_alpha_to_one else self.alphas_cumprod[0]
         # standard deviation of the initial noise distribution
         self.init_noise_sigma = 1.0
         # setable values
         self.num_inference_steps = None
-        self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64))
         self._step_index = None
@@ -269,7 +283,9 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
     def step_index(self):
         return self._step_index
-    def scale_model_input(self, sample: torch.FloatTensor, timestep: Optional[int] = None) -> torch.FloatTensor:
         """
         Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
         current timestep.
@@ -300,7 +316,9 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
         batch_size, channels, *remaining_dims = sample.shape
         if dtype not in (torch.float32, torch.float64):
-            sample = sample.float()  # upcast for quantile calculation, and clamp not implemented for cpu half
         # Flatten sample for doing quantile calculation along each image
         sample = sample.reshape(batch_size, channels * np.prod(remaining_dims))
@@ -312,7 +330,9 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
             s, min=1, max=self.config.sample_max_value
         )  # When clamped to min=1, equivalent to standard clipping to [-1, 1]
         s = s.unsqueeze(1)  # (batch_size, 1) because clamp will broadcast along dim=0
-        sample = torch.clamp(sample, -s, s) / s  # "we threshold xt0 to the range [-s, s] and then divide by s"
         sample = sample.reshape(batch_size, channels, *remaining_dims)
         sample = sample.to(dtype)
@@ -349,7 +369,9 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
         self.num_inference_steps = num_inference_steps
         original_steps = (
-            original_inference_steps if original_inference_steps is not None else self.original_inference_steps
         )
         if original_steps > self.config.num_train_timesteps:
@@ -375,7 +397,9 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
         # LCM Inference Steps Schedule
         timesteps = lcm_origin_timesteps[::-skipping_step][:num_inference_steps]
-        self.timesteps = torch.from_numpy(timesteps.copy()).to(device=device, dtype=torch.long)
         self._step_index = None
@@ -432,7 +456,11 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
         # 2. compute alphas, betas
         alpha_prod_t = self.alphas_cumprod[timestep]
-        alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod
         beta_prod_t = 1 - alpha_prod_t
         beta_prod_t_prev = 1 - alpha_prod_t_prev
@@ -442,11 +470,15 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
         # 4. Compute the predicted original sample x_0 based on the model parameterization
         if self.config.prediction_type == "epsilon":  # noise-prediction
-            predicted_original_sample = (sample - beta_prod_t.sqrt() * model_output) / alpha_prod_t.sqrt()
         elif self.config.prediction_type == "sample":  # x-prediction
             predicted_original_sample = model_output
         elif self.config.prediction_type == "v_prediction":  # v-prediction
-            predicted_original_sample = alpha_prod_t.sqrt() * sample - beta_prod_t.sqrt() * model_output
         else:
             raise ValueError(
                 f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample` or"
@@ -455,7 +487,9 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
         # 5. Clip or threshold "predicted x_0"
         if self.config.thresholding:
-            predicted_original_sample = self._threshold_sample(predicted_original_sample)
         elif self.config.clip_sample:
             predicted_original_sample = predicted_original_sample.clamp(
                 -self.config.clip_sample_range, self.config.clip_sample_range
@@ -467,8 +501,12 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
         # 7. Sample and inject noise z ~ N(0, I) for MultiStep Inference
         # Noise is not used for one-step sampling.
         if len(self.timesteps) > 1:
-            noise = randn_tensor(model_output.shape, generator=generator, device=model_output.device)
-            prev_sample = alpha_prod_t_prev.sqrt() * denoised + beta_prod_t_prev.sqrt() * noise
         else:
             prev_sample = denoised
@@ -488,7 +526,9 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
         timesteps: torch.IntTensor,
     ) -> torch.FloatTensor:
         # Make sure alphas_cumprod and timestep have same device and dtype as original_samples
-        alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device, dtype=original_samples.dtype)
         timesteps = timesteps.to(original_samples.device)
         sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
@@ -501,15 +541,22 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
         while len(sqrt_one_minus_alpha_prod.shape) < len(original_samples.shape):
             sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1)
-        noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise
         return noisy_samples
     # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity
     def get_velocity(
-        self, sample: torch.FloatTensor, noise: torch.FloatTensor, timesteps: torch.IntTensor
     ) -> torch.FloatTensor:
         # Make sure alphas_cumprod and timestep have same device and dtype as sample
-        alphas_cumprod = self.alphas_cumprod.to(device=sample.device, dtype=sample.dtype)
         timesteps = timesteps.to(sample.device)
         sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5

         if trained_betas is not None:
             self.betas = torch.tensor(trained_betas, dtype=torch.float32)
         elif beta_schedule == "linear":
+            self.betas = torch.linspace(
+                beta_start, beta_end, num_train_timesteps, dtype=torch.float32
+            )
         elif beta_schedule == "scaled_linear":
             # this schedule is very specific to the latent diffusion model.
             self.betas = (
+                torch.linspace(
+                    beta_start**0.5,
+                    beta_end**0.5,
+                    num_train_timesteps,
+                    dtype=torch.float32,
+                )
+                ** 2
             )
         elif beta_schedule == "squaredcos_cap_v2":
             # Glide cosine schedule
             self.betas = betas_for_alpha_bar(num_train_timesteps)
         else:
+            raise NotImplementedError(
+                f"{beta_schedule} does is not implemented for {self.__class__}"
+            )
         # Rescale for zero SNR
         if rescale_betas_zero_snr:
         # For the final step, there is no previous alphas_cumprod because we are already at 0
         # `set_alpha_to_one` decides whether we set this parameter simply to one or
         # whether we use the final alpha of the "non-previous" one.
+        self.final_alpha_cumprod = (
+            torch.tensor(1.0) if set_alpha_to_one else self.alphas_cumprod[0]
+        )
         # standard deviation of the initial noise distribution
         self.init_noise_sigma = 1.0
         # setable values
         self.num_inference_steps = None
+        self.timesteps = torch.from_numpy(
+            np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64)
+        )
         self._step_index = None
     def step_index(self):
         return self._step_index
+    def scale_model_input(
+        self, sample: torch.FloatTensor, timestep: Optional[int] = None
+    ) -> torch.FloatTensor:
         """
         Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
         current timestep.
         batch_size, channels, *remaining_dims = sample.shape
         if dtype not in (torch.float32, torch.float64):
+            sample = (
+                sample.float()
+            )  # upcast for quantile calculation, and clamp not implemented for cpu half
         # Flatten sample for doing quantile calculation along each image
         sample = sample.reshape(batch_size, channels * np.prod(remaining_dims))
             s, min=1, max=self.config.sample_max_value
         )  # When clamped to min=1, equivalent to standard clipping to [-1, 1]
         s = s.unsqueeze(1)  # (batch_size, 1) because clamp will broadcast along dim=0
+        sample = (
+            torch.clamp(sample, -s, s) / s
+        )  # "we threshold xt0 to the range [-s, s] and then divide by s"
         sample = sample.reshape(batch_size, channels, *remaining_dims)
         sample = sample.to(dtype)
         self.num_inference_steps = num_inference_steps
         original_steps = (
+            original_inference_steps
+            if original_inference_steps is not None
+            else self.original_inference_steps
         )
         if original_steps > self.config.num_train_timesteps:
         # LCM Inference Steps Schedule
         timesteps = lcm_origin_timesteps[::-skipping_step][:num_inference_steps]
+        self.timesteps = torch.from_numpy(timesteps.copy()).to(
+            device=device, dtype=torch.long
+        )
         self._step_index = None
         # 2. compute alphas, betas
         alpha_prod_t = self.alphas_cumprod[timestep]
+        alpha_prod_t_prev = (
+            self.alphas_cumprod[prev_timestep]
+            if prev_timestep >= 0
+            else self.final_alpha_cumprod
+        )
         beta_prod_t = 1 - alpha_prod_t
         beta_prod_t_prev = 1 - alpha_prod_t_prev
         # 4. Compute the predicted original sample x_0 based on the model parameterization
         if self.config.prediction_type == "epsilon":  # noise-prediction
+            predicted_original_sample = (
+                sample - beta_prod_t.sqrt() * model_output
+            ) / alpha_prod_t.sqrt()
         elif self.config.prediction_type == "sample":  # x-prediction
             predicted_original_sample = model_output
         elif self.config.prediction_type == "v_prediction":  # v-prediction
+            predicted_original_sample = (
+                alpha_prod_t.sqrt() * sample - beta_prod_t.sqrt() * model_output
+            )
         else:
             raise ValueError(
                 f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample` or"
         # 5. Clip or threshold "predicted x_0"
         if self.config.thresholding:
+            predicted_original_sample = self._threshold_sample(
+                predicted_original_sample
+            )
         elif self.config.clip_sample:
             predicted_original_sample = predicted_original_sample.clamp(
                 -self.config.clip_sample_range, self.config.clip_sample_range
         # 7. Sample and inject noise z ~ N(0, I) for MultiStep Inference
         # Noise is not used for one-step sampling.
         if len(self.timesteps) > 1:
+            noise = randn_tensor(
+                model_output.shape, generator=generator, device=model_output.device
+            )
+            prev_sample = (
+                alpha_prod_t_prev.sqrt() * denoised + beta_prod_t_prev.sqrt() * noise
+            )
         else:
             prev_sample = denoised
         timesteps: torch.IntTensor,
     ) -> torch.FloatTensor:
         # Make sure alphas_cumprod and timestep have same device and dtype as original_samples
+        alphas_cumprod = self.alphas_cumprod.to(
+            device=original_samples.device, dtype=original_samples.dtype
+        )
         timesteps = timesteps.to(original_samples.device)
         sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
         while len(sqrt_one_minus_alpha_prod.shape) < len(original_samples.shape):
             sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1)
+        noisy_samples = (
+            sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise
+        )
         return noisy_samples
     # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity
     def get_velocity(
+        self,
+        sample: torch.FloatTensor,
+        noise: torch.FloatTensor,
+        timesteps: torch.IntTensor,
     ) -> torch.FloatTensor:
         # Make sure alphas_cumprod and timestep have same device and dtype as sample
+        alphas_cumprod = self.alphas_cumprod.to(
+            device=sample.device, dtype=sample.dtype
+        )
         timesteps = timesteps.to(sample.device)
         sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5

backend/models/__pycache__/lcmdiffusion_setting.cpython-311.pyc CHANGED Viewed

Binary files a/backend/models/__pycache__/lcmdiffusion_setting.cpython-311.pyc and b/backend/models/__pycache__/lcmdiffusion_setting.cpython-311.pyc differ

backend/models/lcmdiffusion_setting.py CHANGED Viewed

@@ -1,19 +1,29 @@
 from typing import Optional
 from pydantic import BaseModel
-from constants import LCM_DEFAULT_MODEL
 class LCMDiffusionSetting(BaseModel):
     lcm_model_id: str = LCM_DEFAULT_MODEL
     prompt: str = ""
     image_height: Optional[int] = 512
     image_width: Optional[int] = 512
     inference_steps: Optional[int] = 4
-    guidance_scale: Optional[float] = 8
     number_of_images: Optional[int] = 1
     seed: Optional[int] = -1
-    use_openvino: bool = False
     use_seed: bool = False
-    use_offline_model: bool = False
-    use_safety_checker: bool = True

 from typing import Optional
 from pydantic import BaseModel
+from constants import LCM_DEFAULT_MODEL, LCM_DEFAULT_MODEL_OPENVINO
+class LCMLora(BaseModel):
+    base_model_id: str = ""
+    lcm_lora_id: str = ""
 class LCMDiffusionSetting(BaseModel):
     lcm_model_id: str = LCM_DEFAULT_MODEL
+    openvino_lcm_model_id: str = LCM_DEFAULT_MODEL_OPENVINO
+    use_offline_model: bool = False
+    use_lcm_lora: bool = False
+    lcm_lora: Optional[LCMLora] = LCMLora()
+    use_tiny_auto_encoder: bool = False
+    use_openvino: bool = False
     prompt: str = ""
+    negative_prompt: str = ""
     image_height: Optional[int] = 512
     image_width: Optional[int] = 512
     inference_steps: Optional[int] = 4
+    guidance_scale: Optional[float] = 1
     number_of_images: Optional[int] = 1
     seed: Optional[int] = -1
     use_seed: bool = False
+    use_safety_checker: bool = False

constants.py CHANGED Viewed

@@ -1,10 +1,16 @@
 from os import environ
-APP_VERSION = "v1.0.0 beta 7"
 LCM_DEFAULT_MODEL = "SimianLuo/LCM_Dreamshaper_v7"
-LCM_DEFAULT_MODEL_OPENVINO = "rupeshs/LCM-dreamshaper-v7-openvino-int8"
 APP_NAME = "FastSD CPU"
 APP_SETTINGS_FILE = "settings.yaml"
 RESULTS_DIRECTORY = "results"
 CONFIG_DIRECTORY = "configs"
 DEVICE = environ.get("DEVICE", "cpu")

 from os import environ
+APP_VERSION = "v1.0.0 beta 16"
 LCM_DEFAULT_MODEL = "SimianLuo/LCM_Dreamshaper_v7"
+LCM_DEFAULT_MODEL_OPENVINO = "rupeshs/LCM-dreamshaper-v7-openvino"
 APP_NAME = "FastSD CPU"
 APP_SETTINGS_FILE = "settings.yaml"
 RESULTS_DIRECTORY = "results"
 CONFIG_DIRECTORY = "configs"
 DEVICE = environ.get("DEVICE", "cpu")
+SD_MODELS_FILE = "stable-diffusion-models.txt"
+LCM_LORA_MODELS_FILE = "lcm-lora-models.txt"
+OPENVINO_LCM_MODELS_FILE = "openvino-lcm-models.txt"
+TAESD_MODEL = "madebyollin/taesd"
+TAESDXL_MODEL = "madebyollin/taesdxl"
+TAESD_MODEL_OPENVINO = "deinferno/taesd-openvino"

context.py CHANGED Viewed

@@ -2,7 +2,7 @@ from typing import Any
 from app_settings import Settings
 from models.interface_types import InterfaceType
 from backend.lcm_text_to_image import LCMTextToImage
-from time import time
 from backend.image_saver import ImageSaver
 from pprint import pprint
@@ -22,23 +22,28 @@ class Context:
         reshape: bool = False,
         device: str = "cpu",
     ) -> Any:
-        tick = time()
         pprint(settings.lcm_diffusion_setting.model_dump())
         self.lcm_text_to_image.init(
             settings.lcm_diffusion_setting.lcm_model_id,
             settings.lcm_diffusion_setting.use_openvino,
             device,
             settings.lcm_diffusion_setting.use_offline_model,
         )
         images = self.lcm_text_to_image.generate(
             settings.lcm_diffusion_setting,
             reshape,
         )
-        elapsed = time() - tick
-        # ImageSaver.save_images(
-        #     settings.results_path,
-        #     images=images,
-        #     lcm_diffusion_setting=settings.lcm_diffusion_setting,
-        # )
         print(f"Elapsed time : {elapsed:.2f} seconds")
         return images

 from app_settings import Settings
 from models.interface_types import InterfaceType
 from backend.lcm_text_to_image import LCMTextToImage
+from time import perf_counter
 from backend.image_saver import ImageSaver
 from pprint import pprint
         reshape: bool = False,
         device: str = "cpu",
     ) -> Any:
+        tick = perf_counter()
         pprint(settings.lcm_diffusion_setting.model_dump())
+        if not settings.lcm_diffusion_setting.lcm_lora:
+            return None
         self.lcm_text_to_image.init(
             settings.lcm_diffusion_setting.lcm_model_id,
             settings.lcm_diffusion_setting.use_openvino,
             device,
             settings.lcm_diffusion_setting.use_offline_model,
+            settings.lcm_diffusion_setting.use_tiny_auto_encoder,
+            settings.lcm_diffusion_setting.use_lcm_lora,
+            settings.lcm_diffusion_setting.lcm_lora,
         )
         images = self.lcm_text_to_image.generate(
             settings.lcm_diffusion_setting,
             reshape,
         )
+        elapsed = perf_counter() - tick
+        ImageSaver.save_images(
+            settings.results_path,
+            images=images,
+            lcm_diffusion_setting=settings.lcm_diffusion_setting,
+        )
         print(f"Elapsed time : {elapsed:.2f} seconds")
         return images

frontend/__pycache__/utils.cpython-311.pyc CHANGED Viewed

Binary files a/frontend/__pycache__/utils.cpython-311.pyc and b/frontend/__pycache__/utils.cpython-311.pyc differ

frontend/gui/__pycache__/app_window.cpython-311.pyc CHANGED Viewed

Binary files a/frontend/gui/__pycache__/app_window.cpython-311.pyc and b/frontend/gui/__pycache__/app_window.cpython-311.pyc differ

frontend/gui/__pycache__/image_generator_worker.cpython-311.pyc CHANGED Viewed

Binary files a/frontend/gui/__pycache__/image_generator_worker.cpython-311.pyc and b/frontend/gui/__pycache__/image_generator_worker.cpython-311.pyc differ

frontend/gui/__pycache__/ui.cpython-311.pyc CHANGED Viewed

Binary files a/frontend/gui/__pycache__/ui.cpython-311.pyc and b/frontend/gui/__pycache__/ui.cpython-311.pyc differ

frontend/gui/app_window.py CHANGED Viewed

@@ -16,7 +16,7 @@ from PyQt5.QtWidgets import (
     QToolButton,
     QFileDialog,
 )
 from PyQt5.QtGui import QPixmap, QDesktopServices
 from PyQt5.QtCore import QSize, QThreadPool, Qt, QUrl
@@ -34,18 +34,23 @@ from frontend.utils import is_reshape_required
 from context import Context
 from models.interface_types import InterfaceType
 from constants import DEVICE
-from frontend.utils import enable_openvino_controls
 class MainWindow(QMainWindow):
     def __init__(self, config: AppSettings):
         super().__init__()
         self.setWindowTitle(APP_NAME)
-        self.setFixedSize(QSize(600, 620))
         self.init_ui()
         self.pipeline = None
         self.threadpool = QThreadPool()
-        self.config = config
         self.device = "cpu"
         self.previous_width = 0
         self.previous_height = 0
@@ -89,6 +94,37 @@ class MainWindow(QMainWindow):
         self.num_images.setValue(
             self.config.settings.lcm_diffusion_setting.number_of_images
         )
     def init_ui(self):
         self.create_main_tab()
@@ -100,20 +136,26 @@ class MainWindow(QMainWindow):
         self.img = QLabel("<<Image>>")
         self.img.setAlignment(Qt.AlignCenter)
         self.img.setFixedSize(QSize(512, 512))
         self.prompt = QTextEdit()
         self.prompt.setPlaceholderText("A fantasy landscape")
         self.prompt.setAcceptRichText(False)
         self.generate = QPushButton("Generate")
         self.generate.clicked.connect(self.text_to_image)
-        self.prompt.setFixedHeight(35)
         self.browse_results = QPushButton("...")
         self.browse_results.setFixedWidth(30)
         self.browse_results.clicked.connect(self.on_open_results_folder)
         self.browse_results.setToolTip("Open output folder")
         hlayout = QHBoxLayout()
-        hlayout.addWidget(self.prompt)
         hlayout.addWidget(self.generate)
         hlayout.addWidget(self.browse_results)
@@ -130,6 +172,9 @@ class MainWindow(QMainWindow):
         vlayout = QVBoxLayout()
         vlayout.addLayout(hlayout_nav)
         vlayout.addLayout(hlayout)
         self.tab_widget = QTabWidget(self)
@@ -146,11 +191,26 @@ class MainWindow(QMainWindow):
         self.use_seed = False
     def create_settings_tab(self):
-        model_hlayout = QHBoxLayout()
         self.lcm_model_label = QLabel("Latent Consistency Model:")
-        self.lcm_model = QLineEdit(LCM_DEFAULT_MODEL)
-        model_hlayout.addWidget(self.lcm_model_label)
-        model_hlayout.addWidget(self.lcm_model)
         self.inference_steps_value = QLabel("Number of inference steps: 4")
         self.inference_steps = QSlider(orientation=Qt.Orientation.Horizontal)
@@ -166,11 +226,11 @@ class MainWindow(QMainWindow):
         self.num_images.setValue(1)
         self.num_images.valueChanged.connect(self.update_num_images_label)
-        self.guidance_value = QLabel("Guidance scale: 8")
         self.guidance = QSlider(orientation=Qt.Orientation.Horizontal)
-        self.guidance.setMaximum(200)
         self.guidance.setMinimum(10)
-        self.guidance.setValue(80)
         self.guidance.valueChanged.connect(self.update_guidance_label)
         self.width_value = QLabel("Width :")
@@ -178,6 +238,7 @@ class MainWindow(QMainWindow):
         self.width.addItem("256")
         self.width.addItem("512")
         self.width.addItem("768")
         self.width.setCurrentText("512")
         self.width.currentIndexChanged.connect(self.on_width_changed)
@@ -186,6 +247,7 @@ class MainWindow(QMainWindow):
         self.height.addItem("256")
         self.height.addItem("512")
         self.height.addItem("768")
         self.height.setCurrentText("512")
         self.height.currentIndexChanged.connect(self.on_height_changed)
@@ -201,14 +263,27 @@ class MainWindow(QMainWindow):
         self.use_openvino_check = QCheckBox("Use OpenVINO")
         self.use_openvino_check.setChecked(False)
         self.use_local_model_folder = QCheckBox(
             "Use locally cached model or downloaded model folder(offline)"
         )
         self.use_openvino_check.setEnabled(enable_openvino_controls())
         self.use_local_model_folder.setChecked(False)
         self.use_local_model_folder.stateChanged.connect(self.use_offline_model_changed)
         self.use_openvino_check.stateChanged.connect(self.use_openvino_changed)
         hlayout = QHBoxLayout()
         hlayout.addWidget(self.seed_check)
         hlayout.addWidget(self.seed_value)
@@ -228,8 +303,18 @@ class MainWindow(QMainWindow):
         vlayout = QVBoxLayout()
         vspacer = QSpacerItem(20, 20, QSizePolicy.Minimum, QSizePolicy.Expanding)
         vlayout.addItem(hspacer)
-        vlayout.addLayout(model_hlayout)
         vlayout.addWidget(self.use_local_model_folder)
         vlayout.addItem(slider_hspacer)
         vlayout.addWidget(self.inference_steps_value)
         vlayout.addWidget(self.inference_steps)
@@ -243,7 +328,7 @@ class MainWindow(QMainWindow):
         vlayout.addWidget(self.guidance)
         vlayout.addLayout(hlayout)
         vlayout.addWidget(self.safety_checker)
-        vlayout.addWidget(self.use_openvino_check)
         vlayout.addWidget(self.results_path_label)
         hlayout_path = QHBoxLayout()
         hlayout_path.addWidget(self.results_path)
@@ -272,11 +357,27 @@ class MainWindow(QMainWindow):
         vlayout.addWidget(self.label)
         self.tab_about.setLayout(vlayout)
     def on_show_next_image(self):
         if self.image_index != len(self.gen_images) - 1 and len(self.gen_images) > 0:
             self.previous_img_btn.setEnabled(True)
             self.image_index += 1
-            self.img.setPixmap(self.gen_images[self.image_index])
             if self.image_index == len(self.gen_images) - 1:
                 self.next_img_btn.setEnabled(False)
@@ -287,7 +388,7 @@ class MainWindow(QMainWindow):
         if self.image_index != 0:
             self.next_img_btn.setEnabled(True)
             self.image_index -= 1
-            self.img.setPixmap(self.gen_images[self.image_index])
             if self.image_index == 0:
                 self.previous_img_btn.setEnabled(False)
@@ -314,19 +415,62 @@ class MainWindow(QMainWindow):
         height_txt = self.height.itemText(index)
         self.config.settings.lcm_diffusion_setting.image_height = int(height_txt)
     def use_openvino_changed(self, state):
         if state == 2:
             self.lcm_model.setEnabled(False)
             self.config.settings.lcm_diffusion_setting.use_openvino = True
         else:
             self.config.settings.lcm_diffusion_setting.use_openvino = False
     def use_offline_model_changed(self, state):
         if state == 2:
             self.config.settings.lcm_diffusion_setting.use_offline_model = True
         else:
             self.config.settings.lcm_diffusion_setting.use_offline_model = False
     def use_safety_checker_changed(self, state):
         if state == 2:
             self.config.settings.lcm_diffusion_setting.use_safety_checker = True
@@ -362,11 +506,20 @@ class MainWindow(QMainWindow):
     def generate_image(self):
         self.config.settings.lcm_diffusion_setting.seed = self.get_seed_value()
         self.config.settings.lcm_diffusion_setting.prompt = self.prompt.toPlainText()
         if self.config.settings.lcm_diffusion_setting.use_openvino:
-            model_id = LCM_DEFAULT_MODEL_OPENVINO
         else:
-            model_id = self.lcm_model.text()
         self.config.settings.lcm_diffusion_setting.lcm_model_id = model_id
@@ -403,7 +556,7 @@ class MainWindow(QMainWindow):
             self.next_img_btn.setEnabled(False)
             self.previous_img_btn.setEnabled(False)
-        self.img.setPixmap(self.gen_images[0])
         self.previous_width = self.config.settings.lcm_diffusion_setting.image_width
         self.previous_height = self.config.settings.lcm_diffusion_setting.image_height
@@ -428,8 +581,10 @@ class MainWindow(QMainWindow):
         self.width.setCurrentText("512")
         self.height.setCurrentText("512")
         self.inference_steps.setValue(4)
-        self.guidance.setValue(80)
         self.use_openvino_check.setChecked(False)
         self.seed_check.setChecked(False)
-        self.safety_checker.setChecked(True)
         self.results_path.setText(FastStableDiffusionPaths().get_results_path())

     QToolButton,
     QFileDialog,
 )
+from PyQt5 import QtWidgets, QtCore
 from PyQt5.QtGui import QPixmap, QDesktopServices
 from PyQt5.QtCore import QSize, QThreadPool, Qt, QUrl
 from context import Context
 from models.interface_types import InterfaceType
 from constants import DEVICE
+from frontend.utils import enable_openvino_controls, get_valid_model_id
+from backend.lcm_models import get_available_models
+# DPI scale fix
+QtWidgets.QApplication.setAttribute(QtCore.Qt.AA_EnableHighDpiScaling, True)
+QtWidgets.QApplication.setAttribute(QtCore.Qt.AA_UseHighDpiPixmaps, True)
 class MainWindow(QMainWindow):
     def __init__(self, config: AppSettings):
         super().__init__()
+        self.config = config
         self.setWindowTitle(APP_NAME)
+        self.setFixedSize(QSize(600, 670))
         self.init_ui()
         self.pipeline = None
         self.threadpool = QThreadPool()
         self.device = "cpu"
         self.previous_width = 0
         self.previous_height = 0
         self.num_images.setValue(
             self.config.settings.lcm_diffusion_setting.number_of_images
         )
+        self.use_tae_sd.setChecked(
+            self.config.settings.lcm_diffusion_setting.use_tiny_auto_encoder
+        )
+        self.use_lcm_lora.setChecked(
+            self.config.settings.lcm_diffusion_setting.use_lcm_lora
+        )
+        self.base_model_id.setCurrentText(
+            get_valid_model_id(
+                self.config.stable_diffsuion_models,
+                self.config.settings.lcm_diffusion_setting.lcm_lora.base_model_id,
+            )
+        )
+        self.lcm_lora_id.setCurrentText(
+            get_valid_model_id(
+                self.config.lcm_lora_models,
+                self.config.settings.lcm_diffusion_setting.lcm_lora.lcm_lora_id,
+            )
+        )
+        self.openvino_lcm_model_id.setCurrentText(
+            get_valid_model_id(
+                self.config.openvino_lcm_models,
+                self.config.settings.lcm_diffusion_setting.openvino_lcm_model_id,
+            )
+        )
+        self.neg_prompt.setEnabled(
+            self.config.settings.lcm_diffusion_setting.use_lcm_lora
+            or self.config.settings.lcm_diffusion_setting.use_openvino
+        )
+        self.openvino_lcm_model_id.setEnabled(
+            self.config.settings.lcm_diffusion_setting.use_openvino
+        )
     def init_ui(self):
         self.create_main_tab()
         self.img = QLabel("<<Image>>")
         self.img.setAlignment(Qt.AlignCenter)
         self.img.setFixedSize(QSize(512, 512))
+        self.vspacer = QSpacerItem(20, 40, QSizePolicy.Minimum, QSizePolicy.Expanding)
         self.prompt = QTextEdit()
         self.prompt.setPlaceholderText("A fantasy landscape")
         self.prompt.setAcceptRichText(False)
+        self.neg_prompt = QTextEdit()
+        self.neg_prompt.setPlaceholderText("")
+        self.neg_prompt.setAcceptRichText(False)
+        self.neg_prompt_label = QLabel("Negative prompt (Set guidance scale > 1.0):")
         self.generate = QPushButton("Generate")
         self.generate.clicked.connect(self.text_to_image)
+        self.prompt.setFixedHeight(40)
+        self.neg_prompt.setFixedHeight(35)
         self.browse_results = QPushButton("...")
         self.browse_results.setFixedWidth(30)
         self.browse_results.clicked.connect(self.on_open_results_folder)
         self.browse_results.setToolTip("Open output folder")
         hlayout = QHBoxLayout()
+        hlayout.addWidget(self.neg_prompt)
         hlayout.addWidget(self.generate)
         hlayout.addWidget(self.browse_results)
         vlayout = QVBoxLayout()
         vlayout.addLayout(hlayout_nav)
+        vlayout.addItem(self.vspacer)
+        vlayout.addWidget(self.prompt)
+        vlayout.addWidget(self.neg_prompt_label)
         vlayout.addLayout(hlayout)
         self.tab_widget = QTabWidget(self)
         self.use_seed = False
     def create_settings_tab(self):
         self.lcm_model_label = QLabel("Latent Consistency Model:")
+        # self.lcm_model = QLineEdit(LCM_DEFAULT_MODEL)
+        lcm_models = get_available_models()
+        self.lcm_model = QComboBox(self)
+        for model in lcm_models:
+            self.lcm_model.addItem(model)
+        self.use_lcm_lora = QCheckBox("Use LCM LoRA")
+        self.use_lcm_lora.setChecked(False)
+        self.use_lcm_lora.stateChanged.connect(self.use_lcm_lora_changed)
+        self.lora_base_model_id_label = QLabel("Lora base model ID :")
+        self.base_model_id = QComboBox(self)
+        self.base_model_id.addItems(self.config.stable_diffsuion_models)
+        self.base_model_id.currentIndexChanged.connect(self.on_base_model_id_changed)
+        self.lcm_lora_model_id_label = QLabel("LCM LoRA model ID :")
+        self.lcm_lora_id = QComboBox(self)
+        self.lcm_lora_id.addItems(self.config.lcm_lora_models)
+        self.lcm_lora_id.currentIndexChanged.connect(self.on_lcm_lora_id_changed)
         self.inference_steps_value = QLabel("Number of inference steps: 4")
         self.inference_steps = QSlider(orientation=Qt.Orientation.Horizontal)
         self.num_images.setValue(1)
         self.num_images.valueChanged.connect(self.update_num_images_label)
+        self.guidance_value = QLabel("Guidance scale: 1")
         self.guidance = QSlider(orientation=Qt.Orientation.Horizontal)
+        self.guidance.setMaximum(20)
         self.guidance.setMinimum(10)
+        self.guidance.setValue(10)
         self.guidance.valueChanged.connect(self.update_guidance_label)
         self.width_value = QLabel("Width :")
         self.width.addItem("256")
         self.width.addItem("512")
         self.width.addItem("768")
+        self.width.addItem("1024")
         self.width.setCurrentText("512")
         self.width.currentIndexChanged.connect(self.on_width_changed)
         self.height.addItem("256")
         self.height.addItem("512")
         self.height.addItem("768")
+        self.height.addItem("1024")
         self.height.setCurrentText("512")
         self.height.currentIndexChanged.connect(self.on_height_changed)
         self.use_openvino_check = QCheckBox("Use OpenVINO")
         self.use_openvino_check.setChecked(False)
+        self.openvino_model_label = QLabel("OpenVINO LCM model:")
         self.use_local_model_folder = QCheckBox(
             "Use locally cached model or downloaded model folder(offline)"
         )
+        self.openvino_lcm_model_id = QComboBox(self)
+        self.openvino_lcm_model_id.addItems(self.config.openvino_lcm_models)
+        self.openvino_lcm_model_id.currentIndexChanged.connect(
+            self.on_openvino_lcm_model_id_changed
+        )
         self.use_openvino_check.setEnabled(enable_openvino_controls())
         self.use_local_model_folder.setChecked(False)
         self.use_local_model_folder.stateChanged.connect(self.use_offline_model_changed)
         self.use_openvino_check.stateChanged.connect(self.use_openvino_changed)
+        self.use_tae_sd = QCheckBox(
+            "Use Tiny Auto Encoder - TAESD (Fast, moderate quality)"
+        )
+        self.use_tae_sd.setChecked(False)
+        self.use_tae_sd.stateChanged.connect(self.use_tae_sd_changed)
         hlayout = QHBoxLayout()
         hlayout.addWidget(self.seed_check)
         hlayout.addWidget(self.seed_value)
         vlayout = QVBoxLayout()
         vspacer = QSpacerItem(20, 20, QSizePolicy.Minimum, QSizePolicy.Expanding)
         vlayout.addItem(hspacer)
+        vlayout.addWidget(self.lcm_model_label)
+        vlayout.addWidget(self.lcm_model)
         vlayout.addWidget(self.use_local_model_folder)
+        vlayout.addWidget(self.use_lcm_lora)
+        vlayout.addWidget(self.lora_base_model_id_label)
+        vlayout.addWidget(self.base_model_id)
+        vlayout.addWidget(self.lcm_lora_model_id_label)
+        vlayout.addWidget(self.lcm_lora_id)
+        vlayout.addWidget(self.use_openvino_check)
+        vlayout.addWidget(self.openvino_model_label)
+        vlayout.addWidget(self.openvino_lcm_model_id)
+        vlayout.addWidget(self.use_tae_sd)
         vlayout.addItem(slider_hspacer)
         vlayout.addWidget(self.inference_steps_value)
         vlayout.addWidget(self.inference_steps)
         vlayout.addWidget(self.guidance)
         vlayout.addLayout(hlayout)
         vlayout.addWidget(self.safety_checker)
         vlayout.addWidget(self.results_path_label)
         hlayout_path = QHBoxLayout()
         hlayout_path.addWidget(self.results_path)
         vlayout.addWidget(self.label)
         self.tab_about.setLayout(vlayout)
+    def show_image(self, pixmap):
+        image_width = self.config.settings.lcm_diffusion_setting.image_width
+        image_height = self.config.settings.lcm_diffusion_setting.image_height
+        if image_width > 512 or image_height > 512:
+            new_width = 512 if image_width > 512 else image_width
+            new_height = 512 if image_height > 512 else image_height
+            self.img.setPixmap(
+                pixmap.scaled(
+                    new_width,
+                    new_height,
+                    Qt.KeepAspectRatio,
+                )
+            )
+        else:
+            self.img.setPixmap(pixmap)
     def on_show_next_image(self):
         if self.image_index != len(self.gen_images) - 1 and len(self.gen_images) > 0:
             self.previous_img_btn.setEnabled(True)
             self.image_index += 1
+            self.show_image(self.gen_images[self.image_index])
             if self.image_index == len(self.gen_images) - 1:
                 self.next_img_btn.setEnabled(False)
         if self.image_index != 0:
             self.next_img_btn.setEnabled(True)
             self.image_index -= 1
+            self.show_image(self.gen_images[self.image_index])
             if self.image_index == 0:
                 self.previous_img_btn.setEnabled(False)
         height_txt = self.height.itemText(index)
         self.config.settings.lcm_diffusion_setting.image_height = int(height_txt)
+    def on_base_model_id_changed(self, index):
+        model_id = self.base_model_id.itemText(index)
+        self.config.settings.lcm_diffusion_setting.lcm_lora.base_model_id = model_id
+    def on_lcm_lora_id_changed(self, index):
+        model_id = self.lcm_lora_id.itemText(index)
+        self.config.settings.lcm_diffusion_setting.lcm_lora.lcm_lora_id = model_id
+    def on_openvino_lcm_model_id_changed(self, index):
+        model_id = self.openvino_lcm_model_id.itemText(index)
+        self.config.settings.lcm_diffusion_setting.openvino_lcm_model_id = model_id
     def use_openvino_changed(self, state):
         if state == 2:
             self.lcm_model.setEnabled(False)
+            self.use_lcm_lora.setEnabled(False)
+            self.lcm_lora_id.setEnabled(False)
+            self.base_model_id.setEnabled(False)
+            self.neg_prompt.setEnabled(True)
+            self.openvino_lcm_model_id.setEnabled(True)
             self.config.settings.lcm_diffusion_setting.use_openvino = True
         else:
+            self.lcm_model.setEnabled(True)
+            self.use_lcm_lora.setEnabled(True)
+            self.lcm_lora_id.setEnabled(True)
+            self.base_model_id.setEnabled(True)
+            self.neg_prompt.setEnabled(False)
+            self.openvino_lcm_model_id.setEnabled(False)
             self.config.settings.lcm_diffusion_setting.use_openvino = False
+    def use_tae_sd_changed(self, state):
+        if state == 2:
+            self.config.settings.lcm_diffusion_setting.use_tiny_auto_encoder = True
+        else:
+            self.config.settings.lcm_diffusion_setting.use_tiny_auto_encoder = False
     def use_offline_model_changed(self, state):
         if state == 2:
             self.config.settings.lcm_diffusion_setting.use_offline_model = True
         else:
             self.config.settings.lcm_diffusion_setting.use_offline_model = False
+    def use_lcm_lora_changed(self, state):
+        if state == 2:
+            self.lcm_model.setEnabled(False)
+            self.lcm_lora_id.setEnabled(True)
+            self.base_model_id.setEnabled(True)
+            self.neg_prompt.setEnabled(True)
+            self.config.settings.lcm_diffusion_setting.use_lcm_lora = True
+        else:
+            self.lcm_model.setEnabled(True)
+            self.lcm_lora_id.setEnabled(False)
+            self.base_model_id.setEnabled(False)
+            self.neg_prompt.setEnabled(False)
+            self.config.settings.lcm_diffusion_setting.use_lcm_lora = False
     def use_safety_checker_changed(self, state):
         if state == 2:
             self.config.settings.lcm_diffusion_setting.use_safety_checker = True
     def generate_image(self):
         self.config.settings.lcm_diffusion_setting.seed = self.get_seed_value()
         self.config.settings.lcm_diffusion_setting.prompt = self.prompt.toPlainText()
+        self.config.settings.lcm_diffusion_setting.negative_prompt = (
+            self.neg_prompt.toPlainText()
+        )
+        self.config.settings.lcm_diffusion_setting.lcm_lora.lcm_lora_id = (
+            self.lcm_lora_id.currentText()
+        )
+        self.config.settings.lcm_diffusion_setting.lcm_lora.base_model_id = (
+            self.base_model_id.currentText()
+        )
         if self.config.settings.lcm_diffusion_setting.use_openvino:
+            model_id = self.openvino_lcm_model_id.currentText()
         else:
+            model_id = self.lcm_model.currentText()
         self.config.settings.lcm_diffusion_setting.lcm_model_id = model_id
             self.next_img_btn.setEnabled(False)
             self.previous_img_btn.setEnabled(False)
+        self.show_image(self.gen_images[0])
         self.previous_width = self.config.settings.lcm_diffusion_setting.image_width
         self.previous_height = self.config.settings.lcm_diffusion_setting.image_height
         self.width.setCurrentText("512")
         self.height.setCurrentText("512")
         self.inference_steps.setValue(4)
+        self.guidance.setValue(10)
         self.use_openvino_check.setChecked(False)
         self.seed_check.setChecked(False)
+        self.safety_checker.setChecked(False)
         self.results_path.setText(FastStableDiffusionPaths().get_results_path())
+        self.use_tae_sd.setChecked(False)
+        self.use_lcm_lora.setChecked(False)

frontend/utils.py CHANGED Viewed

@@ -1,5 +1,7 @@
 from constants import DEVICE
 import platform
 def is_reshape_required(
@@ -12,9 +14,6 @@ def is_reshape_required(
     prev_num_of_images: int,
     cur_num_of_images: int,
 ) -> bool:
-    print(f"width - {prev_width} {cur_width}")
-    print(f"height - {prev_height} {cur_height}")
-    print(f"model - {prev_model} {cur_model}")
     reshape_required = False
     if (
         prev_width != cur_width
@@ -29,4 +28,23 @@ def is_reshape_required(
 def enable_openvino_controls() -> bool:
-    return DEVICE == "cpu" and platform.system().lower() != "darwin"

 from constants import DEVICE
+from typing import List
 import platform
+from backend.device import is_openvino_device
 def is_reshape_required(
     prev_num_of_images: int,
     cur_num_of_images: int,
 ) -> bool:
     reshape_required = False
     if (
         prev_width != cur_width
 def enable_openvino_controls() -> bool:
+    return is_openvino_device() and platform.system().lower() != "darwin"
+def get_valid_model_id(
+    models: List,
+    model_id: str,
+) -> str:
+    if len(models) == 0:
+        print("Error: model configuration file is empty,please add some models.")
+        return ""
+    if model_id == "":
+        return models[0]
+    if model_id in models:
+        return model_id
+    else:
+        print(
+            f"Error:{model_id} Model not found in configuration file,so using first model : {models[0]}"
+        )
+        return models[0]

frontend/webui/text_to_image_ui.py CHANGED Viewed

@@ -10,19 +10,6 @@ from frontend.utils import is_reshape_required
 from app_settings import AppSettings
 from constants import DEVICE
 from frontend.utils import enable_openvino_controls
-from scipy.ndimage import zoom
-import numpy as np
-from PIL import Image
-from super_image import CarnModel, ImageLoader
-from torchvision import transforms
-transform_image = transforms.ToPILImage()
-def tensor2img(tensor):
-    tensor = tensor.squeeze(0).cpu().clamp(0, 1)
-    return transform_image(tensor)
 random_enabled = True
@@ -31,16 +18,19 @@ previous_width = 0
 previous_height = 0
 previous_model_id = ""
 previous_num_of_images = 0
-upscaler = CarnModel.from_pretrained("eugenesiow/carn-bam", scale=2)
 def generate_text_to_image(
     prompt,
     inference_steps,
     guidance_scale,
     seed,
     use_openvino,
     use_safety_checker,
 ) -> Any:
     global previous_height, previous_width, previous_model_id, previous_num_of_images
     model_id = LCM_DEFAULT_MODEL
@@ -52,15 +42,16 @@ def generate_text_to_image(
     lcm_diffusion_settings = LCMDiffusionSetting(
         lcm_model_id=model_id,
         prompt=prompt,
-        image_height=384,
-        image_width=384,
         inference_steps=inference_steps,
         guidance_scale=guidance_scale,
-        number_of_images=1,
         seed=seed,
         use_openvino=use_openvino,
         use_safety_checker=use_safety_checker,
         use_seed=use_seed,
     )
     settings = Settings(
         lcm_diffusion_setting=lcm_diffusion_settings,
@@ -69,30 +60,23 @@ def generate_text_to_image(
     if use_openvino:
         reshape = is_reshape_required(
             previous_width,
-            384,
             previous_height,
-            384,
             previous_model_id,
             model_id,
             previous_num_of_images,
-            1,
         )
     images = context.generate_text_to_image(
         settings,
         reshape,
         DEVICE,
     )
-    previous_width = 384
-    previous_height = 384
     previous_model_id = model_id
-    previous_num_of_images = 1
-    out_images = []
-    # for image in images:
-    #     out_images.append(image.resize((768, 768), resample=Image.LANCZOS))
-    #     # in_image = ImageLoader.load_image(image)
-    #     # up_image = upscaler(in_image)
-    #     # out_images.append(tensor2img(up_image))
-    #     # out_images(image)
     return images
@@ -124,10 +108,25 @@ def get_text_to_image_ui(app_settings: AppSettings) -> None:
                         elem_id="generate_button",
                         scale=0,
                     )
                 with gr.Accordion("Advanced options", open=False):
                     guidance_scale = gr.Slider(
-                        1.0, 30.0, value=8, step=0.5, label="Guidance Scale"
                     )
                     seed = gr.Number(
@@ -144,8 +143,8 @@ def get_text_to_image_ui(app_settings: AppSettings) -> None:
                     openvino_checkbox = gr.Checkbox(
                         label="Use OpenVINO",
-                        value=True,
-                        interactive=False,
                     )
                     safety_checker_checkbox = gr.Checkbox(
@@ -153,30 +152,23 @@ def get_text_to_image_ui(app_settings: AppSettings) -> None:
                         value=True,
                         interactive=True,
                     )
-                    num_inference_steps = gr.Slider(
-                        1, 8, value=4, step=1, label="Inference Steps"
                     )
-                    # image_height = gr.Slider(
-                    #     256, 768, value=384, step=64, label="Image Height",interactive=Fa
-                    # )
-                    # image_width = gr.Slider(
-                    #     256, 768, value=384, step=64, label="Image Width"
-                    # )
-                    # num_images = gr.Slider(
-                    #     1,
-                    #     50,
-                    #     value=1,
-                    #     step=1,
-                    #     label="Number of images to generate",
-                    # )
                     input_params = [
                         prompt,
                         num_inference_steps,
                         guidance_scale,
                         seed,
                         openvino_checkbox,
                         safety_checker_checkbox,
                     ]
             with gr.Column():

 from app_settings import AppSettings
 from constants import DEVICE
 from frontend.utils import enable_openvino_controls
 random_enabled = True
 previous_height = 0
 previous_model_id = ""
 previous_num_of_images = 0
 def generate_text_to_image(
     prompt,
+    image_height,
+    image_width,
     inference_steps,
     guidance_scale,
+    num_images,
     seed,
     use_openvino,
     use_safety_checker,
+    tiny_auto_encoder_checkbox,
 ) -> Any:
     global previous_height, previous_width, previous_model_id, previous_num_of_images
     model_id = LCM_DEFAULT_MODEL
     lcm_diffusion_settings = LCMDiffusionSetting(
         lcm_model_id=model_id,
         prompt=prompt,
+        image_height=image_height,
+        image_width=image_width,
         inference_steps=inference_steps,
         guidance_scale=guidance_scale,
+        number_of_images=num_images,
         seed=seed,
         use_openvino=use_openvino,
         use_safety_checker=use_safety_checker,
         use_seed=use_seed,
+        use_tiny_auto_encoder=tiny_auto_encoder_checkbox,
     )
     settings = Settings(
         lcm_diffusion_setting=lcm_diffusion_settings,
     if use_openvino:
         reshape = is_reshape_required(
             previous_width,
+            image_width,
             previous_height,
+            image_height,
             previous_model_id,
             model_id,
             previous_num_of_images,
+            num_images,
         )
     images = context.generate_text_to_image(
         settings,
         reshape,
         DEVICE,
     )
+    previous_width = image_width
+    previous_height = image_height
     previous_model_id = model_id
+    previous_num_of_images = num_images
     return images
                         elem_id="generate_button",
                         scale=0,
                     )
+                num_inference_steps = gr.Slider(
+                    1, 25, value=4, step=1, label="Inference Steps"
+                )
+                image_height = gr.Slider(
+                    256, 768, value=512, step=256, label="Image Height"
+                )
+                image_width = gr.Slider(
+                    256, 768, value=512, step=256, label="Image Width"
+                )
+                num_images = gr.Slider(
+                    1,
+                    50,
+                    value=1,
+                    step=1,
+                    label="Number of images to generate",
+                )
                 with gr.Accordion("Advanced options", open=False):
                     guidance_scale = gr.Slider(
+                        1.0, 2.0, value=1.0, step=0.5, label="Guidance Scale"
                     )
                     seed = gr.Number(
                     openvino_checkbox = gr.Checkbox(
                         label="Use OpenVINO",
+                        value=False,
+                        interactive=enable_openvino_controls(),
                     )
                     safety_checker_checkbox = gr.Checkbox(
                         value=True,
                         interactive=True,
                     )
+                    tiny_auto_encoder_checkbox = gr.Checkbox(
+                        label="Use tiny auto encoder for SD",
+                        value=False,
+                        interactive=True,
                     )
                     input_params = [
                         prompt,
+                        image_height,
+                        image_width,
                         num_inference_steps,
                         guidance_scale,
+                        num_images,
                         seed,
                         openvino_checkbox,
                         safety_checker_checkbox,
+                        tiny_auto_encoder_checkbox,
                     ]
             with gr.Column():

frontend/webui/ui.py CHANGED Viewed

@@ -6,7 +6,7 @@ from app_settings import AppSettings
 def _get_footer_message() -> str:
-    version = f"<center><p> v{APP_VERSION} "
     footer_msg = version + (
         '  © 2023 <a href="https://github.com/rupeshs">'
         " Rupesh Sreeraman</a></p></center>"
@@ -19,7 +19,7 @@ def get_web_ui(app_settings: AppSettings) -> gr.Blocks:
         css=FastStableDiffusionPaths.get_css_path(),
         title="FastSD CPU",
     ) as fastsd_web_ui:
-        gr.HTML("<center><H1>FastSD CPU demo (OpenVINO)</H1></center>")
         with gr.Tabs():
             with gr.TabItem("Text to Image"):
                 get_text_to_image_ui(app_settings)

 def _get_footer_message() -> str:
+    version = f"<center><p> {APP_VERSION} "
     footer_msg = version + (
         '  © 2023 <a href="https://github.com/rupeshs">'
         " Rupesh Sreeraman</a></p></center>"
         css=FastStableDiffusionPaths.get_css_path(),
         title="FastSD CPU",
     ) as fastsd_web_ui:
+        gr.HTML("<center><H1>FastSD CPU</H1></center>")
         with gr.Tabs():
             with gr.TabItem("Text to Image"):
                 get_text_to_image_ui(app_settings)

models/__pycache__/interface_types.cpython-311.pyc CHANGED Viewed

Binary files a/models/__pycache__/interface_types.cpython-311.pyc and b/models/__pycache__/interface_types.cpython-311.pyc differ

models/__pycache__/settings.cpython-311.pyc CHANGED Viewed

Binary files a/models/__pycache__/settings.cpython-311.pyc and b/models/__pycache__/settings.cpython-311.pyc differ

models/settings.py CHANGED Viewed

@@ -1,8 +1,8 @@
 from pydantic import BaseModel
-from backend.models.lcmdiffusion_setting import LCMDiffusionSetting
 from paths import FastStableDiffusionPaths
 class Settings(BaseModel):
     results_path: str = FastStableDiffusionPaths().get_results_path()
-    lcm_diffusion_setting: LCMDiffusionSetting = LCMDiffusionSetting()

 from pydantic import BaseModel
+from backend.models.lcmdiffusion_setting import LCMDiffusionSetting, LCMLora
 from paths import FastStableDiffusionPaths
 class Settings(BaseModel):
     results_path: str = FastStableDiffusionPaths().get_results_path()
+    lcm_diffusion_setting: LCMDiffusionSetting = LCMDiffusionSetting(lcm_lora=LCMLora())

paths.py CHANGED Viewed

@@ -9,7 +9,7 @@ def join_paths(
     return os.path.join(first_path, second_path)
-def get_app_path():
     app_dir = os.path.dirname(__file__)
     work_dir = os.path.dirname(app_dir)
     return work_dir
@@ -36,7 +36,7 @@ class FastStableDiffusionPaths:
         return results_path
     @staticmethod
-    def get_css_path():
         app_dir = os.path.dirname(__file__)
         css_path = os.path.join(
             app_dir,
@@ -46,3 +46,12 @@ class FastStableDiffusionPaths:
             "style.css",
         )
         return css_path

     return os.path.join(first_path, second_path)
+def get_app_path() -> str:
     app_dir = os.path.dirname(__file__)
     work_dir = os.path.dirname(app_dir)
     return work_dir
         return results_path
     @staticmethod
+    def get_css_path() -> str:
         app_dir = os.path.dirname(__file__)
         css_path = os.path.join(
             app_dir,
             "style.css",
         )
         return css_path
+    @staticmethod
+    def get_models_config_path(model_config_file: str) -> str:
+        configs_path = get_configs_path()
+        models_path = join_paths(
+            configs_path,
+            model_config_file,
+        )
+        return models_path

utils.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import platform
 def show_system_info():
@@ -8,3 +9,13 @@ def show_system_info():
         print(f"Processor: {platform.processor()}")
     except Exception as ex:
         print(f"Error ocurred while getting system information {ex}")

 import platform
+from typing import List
 def show_system_info():
         print(f"Processor: {platform.processor()}")
     except Exception as ex:
         print(f"Error ocurred while getting system information {ex}")
+def get_models_from_text_file(file_path: str) -> List:
+    models = []
+    with open(file_path, "r") as file:
+        lines = file.readlines()
+    for repo_id in lines:
+        if repo_id.strip() != "":
+            models.append(repo_id.strip())
+    return models