Spaces:
				
			
			
	
			
			
		Running
		
			on 
			
			Zero
	
	
	
			
			
	
	
	
	
		
		
		Running
		
			on 
			
			Zero
	| # model_loader.py | |
| import os | |
| import torch | |
| import spaces | |
| from diffusers import FluxControlNetPipeline | |
| from transformers import T5EncoderModel | |
| from moondream import vl | |
| def safe_model_load(): | |
| """Load models in a single GPU invocation to keep them warm""" | |
| try: | |
| # Set max memory usage for ZeroGPU | |
| torch.cuda.set_per_process_memory_fraction(1.0) | |
| torch.set_float32_matmul_precision("high") | |
| # Load models | |
| huggingface_token = os.getenv("HUGGINFACE_TOKEN") | |
| md_api_key = os.getenv("MD_KEY") | |
| text_encoder = T5EncoderModel.from_pretrained( | |
| "LPX55/FLUX.1-merged_uncensored", | |
| subfolder="text_encoder_2", | |
| torch_dtype=torch.bfloat16, | |
| token=huggingface_token | |
| ) | |
| pipe = FluxControlNetPipeline.from_pretrained( | |
| "LPX55/FLUX.1M-8step_upscaler-cnet", | |
| torch_dtype=torch.bfloat16, | |
| text_encoder_2=text_encoder, | |
| token=huggingface_token | |
| ) | |
| # Apply memory optimizations | |
| try: | |
| pipe.enable_xformers_memory_efficient_attention() | |
| except Exception as e: | |
| print(f"XFormers not available: {e}") | |
| pipe.enable_attention_slicing() | |
| # pipe.enable_sequential_cpu_offload() | |
| pipe.to("cuda") | |
| # For memory-sensitive environments | |
| try: | |
| torch.multiprocessing.set_sharing_strategy('file_system') | |
| except Exception as e: | |
| print(f"Exception raised (torch.multiprocessing): {e}") | |
| return pipe | |
| except Exception as e: | |
| print(f"Model loading failed: {e}") | |
| # Return placeholder to handle gracefully in UI | |
| return {"error": str(e)} |