dbmoradi60 commited on
Commit
0adb580
·
verified ·
1 Parent(s): c6237ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -5
app.py CHANGED
@@ -4,6 +4,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import torch
5
  import os
6
  import shutil
 
7
  from huggingface_hub import hf_hub_download
8
 
9
  app = FastAPI(title="GPT-OSS-20B API")
@@ -28,11 +29,11 @@ if os.path.exists(cache_dir):
28
  else:
29
  os.remove(item_path) if os.path.exists(item_path) else None
30
 
31
- # Create cache and model directories (no chmod needed)
32
  os.makedirs(cache_dir, exist_ok=True)
33
  os.makedirs(MODEL_DIR, exist_ok=True)
34
 
35
- # Download model files explicitly
36
  print("Downloading model files...")
37
  try:
38
  for file in ["config.json", "dtypes.json", "model.safetensors"]:
@@ -46,11 +47,24 @@ try:
46
  except Exception as e:
47
  raise RuntimeError(f"Failed to download model files: {str(e)}")
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  # Load tokenizer
50
  print("Loading tokenizer...")
51
  try:
52
  tokenizer = AutoTokenizer.from_pretrained(
53
- MODEL_DIR,
54
  cache_dir=cache_dir,
55
  trust_remote_code=True
56
  )
@@ -61,7 +75,7 @@ except Exception as e:
61
  print("Loading model (this may take several minutes)...")
62
  try:
63
  model = AutoModelForCausalLM.from_pretrained(
64
- MODEL_DIR,
65
  cache_dir=cache_dir,
66
  device_map="auto", # Automatically place on CPU
67
  torch_dtype="auto", # Automatic precision
@@ -119,4 +133,4 @@ torch.cuda.empty_cache()
119
 
120
  if __name__ == "__main__":
121
  import uvicorn
122
- uvicorn.run(app, host="0.0.0.0", port=8005)
 
4
  import torch
5
  import os
6
  import shutil
7
+ import json
8
  from huggingface_hub import hf_hub_download
9
 
10
  app = FastAPI(title="GPT-OSS-20B API")
 
29
  else:
30
  os.remove(item_path) if os.path.exists(item_path) else None
31
 
32
+ # Create cache and model directories
33
  os.makedirs(cache_dir, exist_ok=True)
34
  os.makedirs(MODEL_DIR, exist_ok=True)
35
 
36
+ # Download model files
37
  print("Downloading model files...")
38
  try:
39
  for file in ["config.json", "dtypes.json", "model.safetensors"]:
 
47
  except Exception as e:
48
  raise RuntimeError(f"Failed to download model files: {str(e)}")
49
 
50
+ # Fix config.json if model_type is missing
51
+ config_path = os.path.join(MODEL_DIR, "original/config.json")
52
+ try:
53
+ with open(config_path, "r") as f:
54
+ config = json.load(f)
55
+ if "model_type" not in config or config["model_type"] != "gpt_oss":
56
+ print("Fixing config.json: setting model_type to 'gpt_oss'")
57
+ config["model_type"] = "gpt_oss"
58
+ with open(config_path, "w") as f:
59
+ json.dump(config, f, indent=2)
60
+ except Exception as e:
61
+ print(f"Warning: Failed to check or fix config.json: {str(e)}")
62
+
63
  # Load tokenizer
64
  print("Loading tokenizer...")
65
  try:
66
  tokenizer = AutoTokenizer.from_pretrained(
67
+ MODEL_ID, # Load directly from Hub to avoid local config issues
68
  cache_dir=cache_dir,
69
  trust_remote_code=True
70
  )
 
75
  print("Loading model (this may take several minutes)...")
76
  try:
77
  model = AutoModelForCausalLM.from_pretrained(
78
+ MODEL_ID, # Load directly from Hub
79
  cache_dir=cache_dir,
80
  device_map="auto", # Automatically place on CPU
81
  torch_dtype="auto", # Automatic precision
 
133
 
134
  if __name__ == "__main__":
135
  import uvicorn
136
+ uvicorn.run(app, host="0.0.0.0", port=8007)