Spaces:

dbmoradi60
/

gpt-oss-20b-cpu

Runtime error

App Files Files Community

dbmoradi60 commited on Aug 7

Commit

0adb580

verified ·

1 Parent(s): c6237ff

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -5

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 import os
 import shutil
 from huggingface_hub import hf_hub_download
 app = FastAPI(title="GPT-OSS-20B API")
@@ -28,11 +29,11 @@ if os.path.exists(cache_dir):
         else:
             os.remove(item_path) if os.path.exists(item_path) else None
-# Create cache and model directories (no chmod needed)
 os.makedirs(cache_dir, exist_ok=True)
 os.makedirs(MODEL_DIR, exist_ok=True)
-# Download model files explicitly
 print("Downloading model files...")
 try:
     for file in ["config.json", "dtypes.json", "model.safetensors"]:
@@ -46,11 +47,24 @@ try:
 except Exception as e:
     raise RuntimeError(f"Failed to download model files: {str(e)}")
 # Load tokenizer
 print("Loading tokenizer...")
 try:
     tokenizer = AutoTokenizer.from_pretrained(
-        MODEL_DIR,
         cache_dir=cache_dir,
         trust_remote_code=True
     )
@@ -61,7 +75,7 @@ except Exception as e:
 print("Loading model (this may take several minutes)...")
 try:
     model = AutoModelForCausalLM.from_pretrained(
-        MODEL_DIR,
         cache_dir=cache_dir,
         device_map="auto",  # Automatically place on CPU
         torch_dtype="auto",  # Automatic precision
@@ -119,4 +133,4 @@ torch.cuda.empty_cache()
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=8005)

 import torch
 import os
 import shutil
+import json
 from huggingface_hub import hf_hub_download
 app = FastAPI(title="GPT-OSS-20B API")
         else:
             os.remove(item_path) if os.path.exists(item_path) else None
+# Create cache and model directories
 os.makedirs(cache_dir, exist_ok=True)
 os.makedirs(MODEL_DIR, exist_ok=True)
+# Download model files
 print("Downloading model files...")
 try:
     for file in ["config.json", "dtypes.json", "model.safetensors"]:
 except Exception as e:
     raise RuntimeError(f"Failed to download model files: {str(e)}")
+# Fix config.json if model_type is missing
+config_path = os.path.join(MODEL_DIR, "original/config.json")
+try:
+    with open(config_path, "r") as f:
+        config = json.load(f)
+    if "model_type" not in config or config["model_type"] != "gpt_oss":
+        print("Fixing config.json: setting model_type to 'gpt_oss'")
+        config["model_type"] = "gpt_oss"
+        with open(config_path, "w") as f:
+            json.dump(config, f, indent=2)
+except Exception as e:
+    print(f"Warning: Failed to check or fix config.json: {str(e)}")
 # Load tokenizer
 print("Loading tokenizer...")
 try:
     tokenizer = AutoTokenizer.from_pretrained(
+        MODEL_ID,  # Load directly from Hub to avoid local config issues
         cache_dir=cache_dir,
         trust_remote_code=True
     )
 print("Loading model (this may take several minutes)...")
 try:
     model = AutoModelForCausalLM.from_pretrained(
+        MODEL_ID,  # Load directly from Hub
         cache_dir=cache_dir,
         device_map="auto",  # Automatically place on CPU
         torch_dtype="auto",  # Automatic precision
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8007)