Spaces:
				
			
			
	
			
			
		Running
		
			on 
			
			Zero
	
	
	
			
			
	
	
	
	
		
		
		Running
		
			on 
			
			Zero
	
		LPX
		
	commited on
		
		
					Commit 
							
							·
						
						f725d52
	
1
								Parent(s):
							
							b971b27
								
Remove sequential CPU offload from model loading and simplify return value in safe_model_load function
Browse files- model_loader.py +2 -7
    	
        model_loader.py
    CHANGED
    
    | @@ -39,7 +39,7 @@ def safe_model_load(): | |
| 39 | 
             
                        print(f"XFormers not available: {e}")
         | 
| 40 |  | 
| 41 | 
             
                    pipe.enable_attention_slicing()
         | 
| 42 | 
            -
                    pipe.enable_sequential_cpu_offload()
         | 
| 43 | 
             
                    pipe.to("cuda")
         | 
| 44 |  | 
| 45 | 
             
                    # For memory-sensitive environments
         | 
| @@ -47,13 +47,8 @@ def safe_model_load(): | |
| 47 | 
             
                        torch.multiprocessing.set_sharing_strategy('file_system')
         | 
| 48 | 
             
                    except Exception as e:
         | 
| 49 | 
             
                        print(f"Exception raised (torch.multiprocessing): {e}")
         | 
| 50 | 
            -
                    # Moondream
         | 
| 51 | 
            -
                    model = vl(api_key=md_api_key)
         | 
| 52 |  | 
| 53 | 
            -
                    return  | 
| 54 | 
            -
                        "pipeline": pipe,
         | 
| 55 | 
            -
                        "captioner": model
         | 
| 56 | 
            -
                    }
         | 
| 57 |  | 
| 58 | 
             
                except Exception as e:
         | 
| 59 | 
             
                    print(f"Model loading failed: {e}")
         | 
|  | |
| 39 | 
             
                        print(f"XFormers not available: {e}")
         | 
| 40 |  | 
| 41 | 
             
                    pipe.enable_attention_slicing()
         | 
| 42 | 
            +
                    # pipe.enable_sequential_cpu_offload()
         | 
| 43 | 
             
                    pipe.to("cuda")
         | 
| 44 |  | 
| 45 | 
             
                    # For memory-sensitive environments
         | 
|  | |
| 47 | 
             
                        torch.multiprocessing.set_sharing_strategy('file_system')
         | 
| 48 | 
             
                    except Exception as e:
         | 
| 49 | 
             
                        print(f"Exception raised (torch.multiprocessing): {e}")
         | 
|  | |
|  | |
| 50 |  | 
| 51 | 
            +
                    return pipe
         | 
|  | |
|  | |
|  | |
| 52 |  | 
| 53 | 
             
                except Exception as e:
         | 
| 54 | 
             
                    print(f"Model loading failed: {e}")
         | 
