Spaces:

loocorez
/

nanochat-mid

Runtime error

App Files Files Community

loocorez commited on 16 days ago

Commit

2e62b02

verified ·

1 Parent(s): 19642cc

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +42 -17

app.py CHANGED Viewed

@@ -7,9 +7,9 @@ os.environ.setdefault("NANOCHAT_BASE_DIR", "/tmp/nanochat")
 from huggingface_hub import hf_hub_download
 import torch
 import gradio as gr
-from nanochat.checkpoint_manager import load_model_from_dir
-from nanochat.engine import Engine
 # Hardcoded model selection for this Space
 MODEL_REPO = "loocorez/nanochat-mid-d20-step765"
@@ -19,18 +19,38 @@ DEPTH = "20"
 ckpt_dir = f"/tmp/ckpt/d{DEPTH}"
 os.makedirs(ckpt_dir, exist_ok=True)
-# tokenizer (where nanochat expects it)
-tokenizer_dir = "/tmp/nanochat/tokenizer"
-os.makedirs(tokenizer_dir, exist_ok=True)
-hf_hub_download(MODEL_REPO, "tokenizer/tokenizer.pkl", local_dir=tokenizer_dir, local_dir_use_symlinks=False)
-# mid checkpoint
-hf_hub_download(MODEL_REPO, f"mid_checkpoints/d{DEPTH}/model_{STEP}.pt", local_dir=ckpt_dir, local_dir_use_symlinks=False)
-hf_hub_download(MODEL_REPO, f"mid_checkpoints/d{DEPTH}/meta_{STEP}.json", local_dir=ckpt_dir, local_dir_use_symlinks=False)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model, tokenizer, _ = load_model_from_dir(ckpt_dir, device, phase="eval")
-engine = Engine(model, tokenizer)
 def chat_fn(history, temperature=0.8, top_k=50, max_new_tokens=256):
     bos = tokenizer.get_bos_token_id()
@@ -47,10 +67,15 @@ def chat_fn(history, temperature=0.8, top_k=50, max_new_tokens=256):
             tokens += [assistant_start] + tokenizer.encode(content) + [assistant_end]
     tokens += [assistant_start]
-    with torch.amp.autocast(device_type="cuda" if device.type == "cuda" else "cpu", dtype=torch.bfloat16 if device.type == "cuda" else torch.float32):
-        token_column, _ = next(engine.generate(tokens, num_samples=1, max_tokens=max_new_tokens, temperature=temperature, top_k=top_k))
-    new_tokens = token_column[len(tokens):]
-    return tokenizer.decode(new_tokens)
 with gr.Blocks() as demo:
     gr.Markdown("# NanoChat MID")

 from huggingface_hub import hf_hub_download
 import torch
 import gradio as gr
+import json
+import pickle
+from nanochat.gpt import GPT, GPTConfig
 # Hardcoded model selection for this Space
 MODEL_REPO = "loocorez/nanochat-mid-d20-step765"
 ckpt_dir = f"/tmp/ckpt/d{DEPTH}"
 os.makedirs(ckpt_dir, exist_ok=True)
+tok_local = hf_hub_download(MODEL_REPO, "tokenizer/tokenizer.pkl", local_dir="/tmp", local_dir_use_symlinks=False)
+model_path = hf_hub_download(MODEL_REPO, f"mid_checkpoints/d{DEPTH}/model_{STEP}.pt", local_dir=ckpt_dir, local_dir_use_symlinks=False)
+meta_path = hf_hub_download(MODEL_REPO, f"mid_checkpoints/d{DEPTH}/meta_{STEP}.json", local_dir=ckpt_dir, local_dir_use_symlinks=False)
+class PklTokenizer:
+    def __init__(self, pkl_path):
+        with open(pkl_path, "rb") as f:
+            self.enc = pickle.load(f)
+        self._bos_id = self.encode_special("<|bos|>")
+    def get_bos_token_id(self):
+        return self._bos_id
+    def encode_special(self, text):
+        return self.enc.encode_single_token(text)
+    def encode(self, text):
+        return self.enc.encode_ordinary(text)
+    def decode(self, ids):
+        return self.enc.decode(ids)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+with open(meta_path, "r") as f:
+    meta = json.load(f)
+cfg = GPTConfig(**meta["model_config"])
+with torch.device("meta"):
+    model = GPT(cfg)
+model.to_empty(device=device)
+model.init_weights()
+state = torch.load(model_path, map_location=device)
+state = {k.lstrip("_orig_mod."): v for k, v in state.items()}
+model.load_state_dict(state, strict=True, assign=True)
+model.eval()
+tokenizer = PklTokenizer(tok_local)
 def chat_fn(history, temperature=0.8, top_k=50, max_new_tokens=256):
     bos = tokenizer.get_bos_token_id()
             tokens += [assistant_start] + tokenizer.encode(content) + [assistant_end]
     tokens += [assistant_start]
+    generated = []
+    use_cuda = device.type == "cuda"
+    dtype = torch.bfloat16 if use_cuda else torch.float32
+    with torch.amp.autocast(device_type=("cuda" if use_cuda else "cpu"), dtype=dtype):
+        for token in model.generate(tokens, max_tokens=max_new_tokens, temperature=temperature, top_k=top_k):
+            if token == assistant_end or token == bos:
+                break
+            generated.append(token)
+    return tokenizer.decode(generated)
 with gr.Blocks() as demo:
     gr.Markdown("# NanoChat MID")