Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -13,7 +13,7 @@ from diffusers.utils import load_image
|
|
| 13 |
from PIL import Image
|
| 14 |
import requests
|
| 15 |
import transformers
|
| 16 |
-
from transformers import AutoTokenizer, T5EncoderModel
|
| 17 |
from translatepy import Translator
|
| 18 |
|
| 19 |
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
|
@@ -53,10 +53,12 @@ transformer = SD3Transformer2DModel.from_pretrained(
|
|
| 53 |
torch_dtype=torch.float16,
|
| 54 |
)
|
| 55 |
|
|
|
|
|
|
|
| 56 |
text_encoder_3 = T5EncoderModel.from_pretrained(
|
| 57 |
repo,
|
| 58 |
subfolder="text_encoder_3",
|
| 59 |
-
|
| 60 |
)
|
| 61 |
|
| 62 |
tokenizer_3 = AutoTokenizer.from_pretrained(
|
|
@@ -65,23 +67,44 @@ tokenizer_3 = AutoTokenizer.from_pretrained(
|
|
| 65 |
torch_dtype=torch.float16,
|
| 66 |
)
|
| 67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
# Ensure model and scheduler are initialized in GPU-enabled function
|
| 69 |
if torch.cuda.is_available():
|
| 70 |
pipe = StableDiffusion3Pipeline.from_pretrained(
|
| 71 |
repo,
|
| 72 |
-
vae=vae,
|
| 73 |
-
transformer=transformer,
|
| 74 |
tokenizer_3=tokenizer_3,
|
| 75 |
text_encoder_3=text_encoder_3,
|
| 76 |
torch_dtype=torch.float16).to("cuda")
|
| 77 |
pipe2 = StableDiffusion3Img2ImgPipeline.from_pretrained(
|
| 78 |
repo,
|
| 79 |
-
vae=vae,
|
| 80 |
-
transformer=transformer,
|
| 81 |
tokenizer_3=tokenizer_3,
|
| 82 |
text_encoder_3=text_encoder_3,
|
| 83 |
torch_dtype=torch.float16).to("cuda")
|
| 84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_config(pipe.scheduler.config)
|
| 86 |
pipe2.scheduler = FlowMatchEulerDiscreteScheduler.from_config(pipe2.scheduler.config)
|
| 87 |
|
|
|
|
| 13 |
from PIL import Image
|
| 14 |
import requests
|
| 15 |
import transformers
|
| 16 |
+
from transformers import AutoTokenizer, T5EncoderModel, BitsAndBytesConfig
|
| 17 |
from translatepy import Translator
|
| 18 |
|
| 19 |
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
|
|
|
| 53 |
torch_dtype=torch.float16,
|
| 54 |
)
|
| 55 |
|
| 56 |
+
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
| 57 |
+
|
| 58 |
text_encoder_3 = T5EncoderModel.from_pretrained(
|
| 59 |
repo,
|
| 60 |
subfolder="text_encoder_3",
|
| 61 |
+
quantization_config=quantization_config,
|
| 62 |
)
|
| 63 |
|
| 64 |
tokenizer_3 = AutoTokenizer.from_pretrained(
|
|
|
|
| 67 |
torch_dtype=torch.float16,
|
| 68 |
)
|
| 69 |
|
| 70 |
+
torch.set_float32_matmul_precision("high")
|
| 71 |
+
|
| 72 |
+
torch._inductor.config.conv_1x1_as_mm = True
|
| 73 |
+
torch._inductor.config.coordinate_descent_tuning = True
|
| 74 |
+
torch._inductor.config.epilogue_fusion = False
|
| 75 |
+
torch._inductor.config.coordinate_descent_check_all_directions = True
|
| 76 |
+
|
| 77 |
# Ensure model and scheduler are initialized in GPU-enabled function
|
| 78 |
if torch.cuda.is_available():
|
| 79 |
pipe = StableDiffusion3Pipeline.from_pretrained(
|
| 80 |
repo,
|
|
|
|
|
|
|
| 81 |
tokenizer_3=tokenizer_3,
|
| 82 |
text_encoder_3=text_encoder_3,
|
| 83 |
torch_dtype=torch.float16).to("cuda")
|
| 84 |
pipe2 = StableDiffusion3Img2ImgPipeline.from_pretrained(
|
| 85 |
repo,
|
|
|
|
|
|
|
| 86 |
tokenizer_3=tokenizer_3,
|
| 87 |
text_encoder_3=text_encoder_3,
|
| 88 |
torch_dtype=torch.float16).to("cuda")
|
| 89 |
|
| 90 |
+
pipe.set_progress_bar_config(disable=True)
|
| 91 |
+
|
| 92 |
+
pipe.transformer.to(memory_format=torch.channels_last)
|
| 93 |
+
pipe.vae.to(memory_format=torch.channels_last)
|
| 94 |
+
|
| 95 |
+
pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True)
|
| 96 |
+
pipe.vae.decode = torch.compile(pipe.vae.decode, mode="max-autotune", fullgraph=True)
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
pipe2.set_progress_bar_config(disable=True)
|
| 100 |
+
|
| 101 |
+
pipe2.transformer.to(memory_format=torch.channels_last)
|
| 102 |
+
pipe2.vae.to(memory_format=torch.channels_last)
|
| 103 |
+
|
| 104 |
+
pipe2.transformer = torch.compile(pipe2.transformer, mode="max-autotune", fullgraph=True)
|
| 105 |
+
pipe2.vae.decode = torch.compile(pipe2.vae.decode, mode="max-autotune", fullgraph=True)
|
| 106 |
+
|
| 107 |
+
|
| 108 |
pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_config(pipe.scheduler.config)
|
| 109 |
pipe2.scheduler = FlowMatchEulerDiscreteScheduler.from_config(pipe2.scheduler.config)
|
| 110 |
|