Spaces:

character-aware-diffusion
/

charred

Runtime error

App Files Files Community

Contrebande Labs commited on Apr 29, 2023

Commit

b07346d

1 Parent(s): e0cb68e

put CPU offloading and half precision back

Browse files

Files changed (1) hide show

app.py +18 -13

app.py CHANGED Viewed

@@ -18,7 +18,6 @@ from transformers import ByT5Tokenizer, FlaxT5ForConditionalGeneration
 def get_inference_lambda(seed):
     tokenizer = ByT5Tokenizer()
     language_model = FlaxT5ForConditionalGeneration.from_pretrained(
@@ -53,17 +52,17 @@ def get_inference_lambda(seed):
         }
     )
     timesteps = 20
-    guidance_scale = jnp.array([7.5], dtype=jnp.float32)
     unet, unet_params = FlaxUNet2DConditionModel.from_pretrained(
         "character-aware-diffusion/charred",
-        dtype=jnp.float32,
     )
     vae, vae_params = FlaxAutoencoderKL.from_pretrained(
         "flax/stable-diffusion-2-1",
         subfolder="vae",
-        dtype=jnp.float32,
     )
     vae_scale_factor = 2 ** (len(vae.config.block_out_channels) - 1)
@@ -71,14 +70,13 @@ def get_inference_lambda(seed):
     # Generating latent shape
     latent_shape = (
-        negative_prompt_text_encoder_hidden_states.shape[0],
         unet.in_channels,
         image_width // vae_scale_factor,
         image_height // vae_scale_factor,
     )
     def __tokenize_prompt(prompt: str):
         return tokenizer(
             text=prompt,
             max_length=1024,
@@ -91,20 +89,21 @@ def get_inference_lambda(seed):
         # create PIL image from JAX tensor converted to numpy
         return Image.fromarray(np.asarray(image), mode="RGB")
-    def __predict_image(tokenized_prompt: jnp.array):
         # Get the text embedding
         text_encoder_hidden_states = text_encoder(
             tokenized_prompt,
             params=text_encoder_params,
             train=False,
         )[0]
-        context = jnp.concatenate(
             [negative_prompt_text_encoder_hidden_states, text_encoder_hidden_states]
         )
         def ___timestep(step, step_args):
             latents, scheduler_state = step_args
             t = jnp.array(scheduler_state.timesteps, dtype=jnp.int32)[step]
@@ -153,7 +152,7 @@ def get_inference_lambda(seed):
         # initialize latents
         initial_latents = (
             jax.random.normal(
-                jax.random.PRNGKey(seed), shape=latent_shape, dtype=jnp.float32
             )
             * initial_scheduler_state.init_noise_sigma
         )
@@ -175,10 +174,16 @@ def get_inference_lambda(seed):
             .astype(jnp.uint8)[0]
         )
-    jax_jit_compiled_predict_image = jax.jit(__predict_image)
     return lambda prompt: __convert_image(
-        jax_jit_compiled_predict_image(__tokenize_prompt(prompt))
     )

 def get_inference_lambda(seed):
     tokenizer = ByT5Tokenizer()
     language_model = FlaxT5ForConditionalGeneration.from_pretrained(
         }
     )
     timesteps = 20
+    guidance_scale = jnp.array([7.5], dtype=jnp.bfloat16)
     unet, unet_params = FlaxUNet2DConditionModel.from_pretrained(
         "character-aware-diffusion/charred",
+        dtype=jnp.bfloat16,
     )
     vae, vae_params = FlaxAutoencoderKL.from_pretrained(
         "flax/stable-diffusion-2-1",
         subfolder="vae",
+        dtype=jnp.bfloat16,
     )
     vae_scale_factor = 2 ** (len(vae.config.block_out_channels) - 1)
     # Generating latent shape
     latent_shape = (
+        negative_prompt_text_encoder_hidden_states.shape[0],  # is th
         unet.in_channels,
         image_width // vae_scale_factor,
         image_height // vae_scale_factor,
     )
     def __tokenize_prompt(prompt: str):
         return tokenizer(
             text=prompt,
             max_length=1024,
         # create PIL image from JAX tensor converted to numpy
         return Image.fromarray(np.asarray(image), mode="RGB")
+    def __get_context(tokenized_prompt: jnp.array):
         # Get the text embedding
         text_encoder_hidden_states = text_encoder(
             tokenized_prompt,
             params=text_encoder_params,
             train=False,
         )[0]
+        # context = empty negative prompt embedding + prompt embedding
+        return jnp.concatenate(
             [negative_prompt_text_encoder_hidden_states, text_encoder_hidden_states]
         )
+    def __predict_image(context: jnp.array):
         def ___timestep(step, step_args):
             latents, scheduler_state = step_args
             t = jnp.array(scheduler_state.timesteps, dtype=jnp.int32)[step]
         # initialize latents
         initial_latents = (
             jax.random.normal(
+                jax.random.PRNGKey(seed), shape=latent_shape, dtype=jnp.bfloat16
             )
             * initial_scheduler_state.init_noise_sigma
         )
             .astype(jnp.uint8)[0]
         )
+    jax_jit_compiled_accel_predict_image = jax.jit(__predict_image)
+    jax_jit_compiled_cpu_get_context = jax.jit(
+        __get_context, device=jax.devices(backend="cpu")[0]
+    )
     return lambda prompt: __convert_image(
+        jax_jit_compiled_accel_predict_image(
+            jax_jit_compiled_cpu_get_context(__tokenize_prompt(prompt))
+        )
     )