Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,6 +4,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
| 4 |
import torch
|
| 5 |
import os
|
| 6 |
import shutil
|
|
|
|
| 7 |
from huggingface_hub import hf_hub_download
|
| 8 |
|
| 9 |
app = FastAPI(title="GPT-OSS-20B API")
|
|
@@ -28,11 +29,11 @@ if os.path.exists(cache_dir):
|
|
| 28 |
else:
|
| 29 |
os.remove(item_path) if os.path.exists(item_path) else None
|
| 30 |
|
| 31 |
-
# Create cache and model directories
|
| 32 |
os.makedirs(cache_dir, exist_ok=True)
|
| 33 |
os.makedirs(MODEL_DIR, exist_ok=True)
|
| 34 |
|
| 35 |
-
# Download model files
|
| 36 |
print("Downloading model files...")
|
| 37 |
try:
|
| 38 |
for file in ["config.json", "dtypes.json", "model.safetensors"]:
|
|
@@ -46,11 +47,24 @@ try:
|
|
| 46 |
except Exception as e:
|
| 47 |
raise RuntimeError(f"Failed to download model files: {str(e)}")
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
# Load tokenizer
|
| 50 |
print("Loading tokenizer...")
|
| 51 |
try:
|
| 52 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 53 |
-
|
| 54 |
cache_dir=cache_dir,
|
| 55 |
trust_remote_code=True
|
| 56 |
)
|
|
@@ -61,7 +75,7 @@ except Exception as e:
|
|
| 61 |
print("Loading model (this may take several minutes)...")
|
| 62 |
try:
|
| 63 |
model = AutoModelForCausalLM.from_pretrained(
|
| 64 |
-
|
| 65 |
cache_dir=cache_dir,
|
| 66 |
device_map="auto", # Automatically place on CPU
|
| 67 |
torch_dtype="auto", # Automatic precision
|
|
@@ -119,4 +133,4 @@ torch.cuda.empty_cache()
|
|
| 119 |
|
| 120 |
if __name__ == "__main__":
|
| 121 |
import uvicorn
|
| 122 |
-
uvicorn.run(app, host="0.0.0.0", port=
|
|
|
|
| 4 |
import torch
|
| 5 |
import os
|
| 6 |
import shutil
|
| 7 |
+
import json
|
| 8 |
from huggingface_hub import hf_hub_download
|
| 9 |
|
| 10 |
app = FastAPI(title="GPT-OSS-20B API")
|
|
|
|
| 29 |
else:
|
| 30 |
os.remove(item_path) if os.path.exists(item_path) else None
|
| 31 |
|
| 32 |
+
# Create cache and model directories
|
| 33 |
os.makedirs(cache_dir, exist_ok=True)
|
| 34 |
os.makedirs(MODEL_DIR, exist_ok=True)
|
| 35 |
|
| 36 |
+
# Download model files
|
| 37 |
print("Downloading model files...")
|
| 38 |
try:
|
| 39 |
for file in ["config.json", "dtypes.json", "model.safetensors"]:
|
|
|
|
| 47 |
except Exception as e:
|
| 48 |
raise RuntimeError(f"Failed to download model files: {str(e)}")
|
| 49 |
|
| 50 |
+
# Fix config.json if model_type is missing
|
| 51 |
+
config_path = os.path.join(MODEL_DIR, "original/config.json")
|
| 52 |
+
try:
|
| 53 |
+
with open(config_path, "r") as f:
|
| 54 |
+
config = json.load(f)
|
| 55 |
+
if "model_type" not in config or config["model_type"] != "gpt_oss":
|
| 56 |
+
print("Fixing config.json: setting model_type to 'gpt_oss'")
|
| 57 |
+
config["model_type"] = "gpt_oss"
|
| 58 |
+
with open(config_path, "w") as f:
|
| 59 |
+
json.dump(config, f, indent=2)
|
| 60 |
+
except Exception as e:
|
| 61 |
+
print(f"Warning: Failed to check or fix config.json: {str(e)}")
|
| 62 |
+
|
| 63 |
# Load tokenizer
|
| 64 |
print("Loading tokenizer...")
|
| 65 |
try:
|
| 66 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 67 |
+
MODEL_ID, # Load directly from Hub to avoid local config issues
|
| 68 |
cache_dir=cache_dir,
|
| 69 |
trust_remote_code=True
|
| 70 |
)
|
|
|
|
| 75 |
print("Loading model (this may take several minutes)...")
|
| 76 |
try:
|
| 77 |
model = AutoModelForCausalLM.from_pretrained(
|
| 78 |
+
MODEL_ID, # Load directly from Hub
|
| 79 |
cache_dir=cache_dir,
|
| 80 |
device_map="auto", # Automatically place on CPU
|
| 81 |
torch_dtype="auto", # Automatic precision
|
|
|
|
| 133 |
|
| 134 |
if __name__ == "__main__":
|
| 135 |
import uvicorn
|
| 136 |
+
uvicorn.run(app, host="0.0.0.0", port=8007)
|