ZeroGPU-LLM-Inference

Running

Luigi commited on Jul 30

Commit

882aef2

1 Parent(s): de303d7

add access_token to model loader

Files changed (1) hide show

app.py CHANGED Viewed

@@ -12,6 +12,8 @@ from transformers import AutoTokenizer
 from ddgs import DDGS
 import spaces  # Import spaces early to enable ZeroGPU support
 # Optional: Disable GPU visibility if you wish to force CPU usage
 # os.environ["CUDA_VISIBLE_DEVICES"] = ""
@@ -96,7 +98,8 @@ def load_pipeline(model_name):
     if model_name in PIPELINES:
         return PIPELINES[model_name]
     repo = MODELS[model_name]["repo_id"]
-    tokenizer = AutoTokenizer.from_pretrained(repo)
     for dtype in (torch.bfloat16, torch.float16, torch.float32):
         try:
             pipe = pipeline(
@@ -105,8 +108,8 @@ def load_pipeline(model_name):
                 tokenizer=tokenizer,
                 trust_remote_code=True,
                 torch_dtype=dtype,
-                device_map="auto"
-            )
             PIPELINES[model_name] = pipe
             return pipe
         except Exception:

 from ddgs import DDGS
 import spaces  # Import spaces early to enable ZeroGPU support
+access_token=os.environ['HF_TOKEN']
 # Optional: Disable GPU visibility if you wish to force CPU usage
 # os.environ["CUDA_VISIBLE_DEVICES"] = ""
     if model_name in PIPELINES:
         return PIPELINES[model_name]
     repo = MODELS[model_name]["repo_id"]
+    tokenizer = AutoTokenizer.from_pretrained(repo,
+                token=access_token)
     for dtype in (torch.bfloat16, torch.float16, torch.float32):
         try:
             pipe = pipeline(
                 tokenizer=tokenizer,
                 trust_remote_code=True,
                 torch_dtype=dtype,
+                device_map="auto",
+                token=access_token)
             PIPELINES[model_name] = pipe
             return pipe
         except Exception: