Spaces:

character-aware-diffusion
/

charred

Runtime error

App Files Files Community

Contrebande Labs commited on Apr 28, 2023

Commit

e0cb68e

1 Parent(s): 06f2eaf

sync with working jax inference code from main repo

Browse files

Files changed (1) hide show

app.py +31 -53

app.py CHANGED Viewed

@@ -16,6 +16,7 @@ from diffusers import (
 from transformers import ByT5Tokenizer, FlaxT5ForConditionalGeneration
 def get_inference_lambda(seed):
     tokenizer = ByT5Tokenizer()
@@ -51,7 +52,7 @@ def get_inference_lambda(seed):
             "trained_betas": None,
         }
     )
-    timesteps = 50
     guidance_scale = jnp.array([7.5], dtype=jnp.float32)
     unet, unet_params = FlaxUNet2DConditionModel.from_pretrained(
@@ -68,7 +69,13 @@ def get_inference_lambda(seed):
     image_width = image_height = 256
-    print("all models setup")
     def __tokenize_prompt(prompt: str):
@@ -78,15 +85,11 @@ def get_inference_lambda(seed):
             padding="max_length",
             truncation=True,
             return_tensors="jax",
-        ).input_ids.astype(jnp.float32)
-    def __convert_image(vae_output):
-        print("skipping image conversion...")
-        return None
-        # return [
-        #     Image.fromarray(image)
-        #     for image in (np.asarray(vae_output) * 255).round().astype(np.uint8)
-        # ]
     def __predict_image(tokenized_prompt: jnp.array):
@@ -99,14 +102,6 @@ def get_inference_lambda(seed):
         context = jnp.concatenate(
             [negative_prompt_text_encoder_hidden_states, text_encoder_hidden_states]
         )
-        jax.debug.print("got text encoding...")
-        latent_shape = (
-            tokenized_prompt.shape[0],
-            unet.in_channels,
-            image_width // vae_scale_factor,
-            image_height // vae_scale_factor,
-        )
         def ___timestep(step, step_args):
@@ -148,15 +143,12 @@ def get_inference_lambda(seed):
                 scheduler_state, guided_unet_prediction_sample, t, latents
             ).to_tuple()
-            jax.debug.print("did one step...")
             return latents, scheduler_state
         # initialize scheduler state
         initial_scheduler_state = scheduler.set_timesteps(
             scheduler.create_state(), num_inference_steps=timesteps, shape=latent_shape
         )
-        jax.debug.print("initialized scheduler state...")
         # initialize latents
         initial_latents = (
@@ -165,49 +157,33 @@ def get_inference_lambda(seed):
             )
             * initial_scheduler_state.init_noise_sigma
         )
-        jax.debug.print("initialized latents...")
         final_latents, _ = jax.lax.fori_loop(
             0, timesteps, ___timestep, (initial_latents, initial_scheduler_state)
         )
-        jax.debug.print("got final latents...")
-        # scale and decode the image latents with vae
-        image = (
-            (
-                vae.apply(
-                    {"params": vae_params},
-                    1 / vae.config.scaling_factor * final_latents,
-                    method=vae.decode,
-                ).sample
-                / 2
-                + 0.5
-            )
-            .clip(0, 1)
-            .transpose(0, 2, 3, 1)
-        )
-        jax.debug.print("got vae processed image output...")
-        # return reshaped vae outputs
-        return image
-    jax_pmap_predict_image = jax.jit(__predict_image)
     return lambda prompt: __convert_image(
-        jax_pmap_predict_image(__tokenize_prompt(prompt))
     )
 generate_image_for_prompt = get_inference_lambda(87)
-print(f"JAX devices: {jax.devices()}")
-print(f"JAX device type: {jax.devices()[0].device_kind}")
-def infer_charred(prompt):
-    # your inference function for charr stable difusion control
-    generate_image_for_prompt(prompt)
-    return None
 with gr.Blocks(theme="gradio/soft") as demo:
@@ -239,10 +215,12 @@ with gr.Blocks(theme="gradio/soft") as demo:
         submit_btn = gr.Button(value="Submit")
         charred_inputs = [prompt_input_charr]
         submit_btn.click(
-            fn=infer_charred, inputs=charred_inputs, outputs=[charred_output]
         )
     # examples = [["postage stamp from california", "low quality", "charr_output.png", "charr_output.png" ]]
     # gr.Examples(fn = infer_sd, inputs = ["text", "text", "image", "image"], examples=examples, cache_examples=True)
 demo.queue(concurrency_count=1)
-demo.launch(debug=True, show_error=True, quiet=False)

 from transformers import ByT5Tokenizer, FlaxT5ForConditionalGeneration
 def get_inference_lambda(seed):
     tokenizer = ByT5Tokenizer()
             "trained_betas": None,
         }
     )
+    timesteps = 20
     guidance_scale = jnp.array([7.5], dtype=jnp.float32)
     unet, unet_params = FlaxUNet2DConditionModel.from_pretrained(
     image_width = image_height = 256
+    # Generating latent shape
+    latent_shape = (
+        negative_prompt_text_encoder_hidden_states.shape[0],
+        unet.in_channels,
+        image_width // vae_scale_factor,
+        image_height // vae_scale_factor,
+    )
     def __tokenize_prompt(prompt: str):
             padding="max_length",
             truncation=True,
             return_tensors="jax",
+        ).input_ids
+    def __convert_image(image):
+        # create PIL image from JAX tensor converted to numpy
+        return Image.fromarray(np.asarray(image), mode="RGB")
     def __predict_image(tokenized_prompt: jnp.array):
         context = jnp.concatenate(
             [negative_prompt_text_encoder_hidden_states, text_encoder_hidden_states]
         )
         def ___timestep(step, step_args):
                 scheduler_state, guided_unet_prediction_sample, t, latents
             ).to_tuple()
             return latents, scheduler_state
         # initialize scheduler state
         initial_scheduler_state = scheduler.set_timesteps(
             scheduler.create_state(), num_inference_steps=timesteps, shape=latent_shape
         )
         # initialize latents
         initial_latents = (
             )
             * initial_scheduler_state.init_noise_sigma
         )
         final_latents, _ = jax.lax.fori_loop(
             0, timesteps, ___timestep, (initial_latents, initial_scheduler_state)
         )
+        vae_output = vae.apply(
+            {"params": vae_params},
+            1 / vae.config.scaling_factor * final_latents,
+            method=vae.decode,
+        ).sample
+        # return 8 bit RGB image (width, height, rgb)
+        return (
+            ((vae_output / 2 + 0.5).transpose(0, 2, 3, 1).clip(0, 1) * 255)
+            .round()
+            .astype(jnp.uint8)[0]
+        )
+    jax_jit_compiled_predict_image = jax.jit(__predict_image)
     return lambda prompt: __convert_image(
+        jax_jit_compiled_predict_image(__tokenize_prompt(prompt))
     )
 generate_image_for_prompt = get_inference_lambda(87)
 with gr.Blocks(theme="gradio/soft") as demo:
         submit_btn = gr.Button(value="Submit")
         charred_inputs = [prompt_input_charr]
         submit_btn.click(
+            fn=generate_image_for_prompt,
+            inputs=charred_inputs,
+            outputs=[charred_output],
         )
     # examples = [["postage stamp from california", "low quality", "charr_output.png", "charr_output.png" ]]
     # gr.Examples(fn = infer_sd, inputs = ["text", "text", "image", "image"], examples=examples, cache_examples=True)
 demo.queue(concurrency_count=1)
+demo.launch(debug=True, show_error=True)