loocorez commited on
Commit
2e62b02
·
verified ·
1 Parent(s): 19642cc

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +42 -17
app.py CHANGED
@@ -7,9 +7,9 @@ os.environ.setdefault("NANOCHAT_BASE_DIR", "/tmp/nanochat")
7
  from huggingface_hub import hf_hub_download
8
  import torch
9
  import gradio as gr
10
-
11
- from nanochat.checkpoint_manager import load_model_from_dir
12
- from nanochat.engine import Engine
13
 
14
  # Hardcoded model selection for this Space
15
  MODEL_REPO = "loocorez/nanochat-mid-d20-step765"
@@ -19,18 +19,38 @@ DEPTH = "20"
19
  ckpt_dir = f"/tmp/ckpt/d{DEPTH}"
20
  os.makedirs(ckpt_dir, exist_ok=True)
21
 
22
- # tokenizer (where nanochat expects it)
23
- tokenizer_dir = "/tmp/nanochat/tokenizer"
24
- os.makedirs(tokenizer_dir, exist_ok=True)
25
- hf_hub_download(MODEL_REPO, "tokenizer/tokenizer.pkl", local_dir=tokenizer_dir, local_dir_use_symlinks=False)
26
-
27
- # mid checkpoint
28
- hf_hub_download(MODEL_REPO, f"mid_checkpoints/d{DEPTH}/model_{STEP}.pt", local_dir=ckpt_dir, local_dir_use_symlinks=False)
29
- hf_hub_download(MODEL_REPO, f"mid_checkpoints/d{DEPTH}/meta_{STEP}.json", local_dir=ckpt_dir, local_dir_use_symlinks=False)
 
 
 
 
 
 
 
 
 
 
30
 
31
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
32
- model, tokenizer, _ = load_model_from_dir(ckpt_dir, device, phase="eval")
33
- engine = Engine(model, tokenizer)
 
 
 
 
 
 
 
 
 
 
34
 
35
  def chat_fn(history, temperature=0.8, top_k=50, max_new_tokens=256):
36
  bos = tokenizer.get_bos_token_id()
@@ -47,10 +67,15 @@ def chat_fn(history, temperature=0.8, top_k=50, max_new_tokens=256):
47
  tokens += [assistant_start] + tokenizer.encode(content) + [assistant_end]
48
  tokens += [assistant_start]
49
 
50
- with torch.amp.autocast(device_type="cuda" if device.type == "cuda" else "cpu", dtype=torch.bfloat16 if device.type == "cuda" else torch.float32):
51
- token_column, _ = next(engine.generate(tokens, num_samples=1, max_tokens=max_new_tokens, temperature=temperature, top_k=top_k))
52
- new_tokens = token_column[len(tokens):]
53
- return tokenizer.decode(new_tokens)
 
 
 
 
 
54
 
55
  with gr.Blocks() as demo:
56
  gr.Markdown("# NanoChat MID")
 
7
  from huggingface_hub import hf_hub_download
8
  import torch
9
  import gradio as gr
10
+ import json
11
+ import pickle
12
+ from nanochat.gpt import GPT, GPTConfig
13
 
14
  # Hardcoded model selection for this Space
15
  MODEL_REPO = "loocorez/nanochat-mid-d20-step765"
 
19
  ckpt_dir = f"/tmp/ckpt/d{DEPTH}"
20
  os.makedirs(ckpt_dir, exist_ok=True)
21
 
22
+ tok_local = hf_hub_download(MODEL_REPO, "tokenizer/tokenizer.pkl", local_dir="/tmp", local_dir_use_symlinks=False)
23
+
24
+ model_path = hf_hub_download(MODEL_REPO, f"mid_checkpoints/d{DEPTH}/model_{STEP}.pt", local_dir=ckpt_dir, local_dir_use_symlinks=False)
25
+ meta_path = hf_hub_download(MODEL_REPO, f"mid_checkpoints/d{DEPTH}/meta_{STEP}.json", local_dir=ckpt_dir, local_dir_use_symlinks=False)
26
+
27
+ class PklTokenizer:
28
+ def __init__(self, pkl_path):
29
+ with open(pkl_path, "rb") as f:
30
+ self.enc = pickle.load(f)
31
+ self._bos_id = self.encode_special("<|bos|>")
32
+ def get_bos_token_id(self):
33
+ return self._bos_id
34
+ def encode_special(self, text):
35
+ return self.enc.encode_single_token(text)
36
+ def encode(self, text):
37
+ return self.enc.encode_ordinary(text)
38
+ def decode(self, ids):
39
+ return self.enc.decode(ids)
40
 
41
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
42
+ with open(meta_path, "r") as f:
43
+ meta = json.load(f)
44
+ cfg = GPTConfig(**meta["model_config"])
45
+ with torch.device("meta"):
46
+ model = GPT(cfg)
47
+ model.to_empty(device=device)
48
+ model.init_weights()
49
+ state = torch.load(model_path, map_location=device)
50
+ state = {k.lstrip("_orig_mod."): v for k, v in state.items()}
51
+ model.load_state_dict(state, strict=True, assign=True)
52
+ model.eval()
53
+ tokenizer = PklTokenizer(tok_local)
54
 
55
  def chat_fn(history, temperature=0.8, top_k=50, max_new_tokens=256):
56
  bos = tokenizer.get_bos_token_id()
 
67
  tokens += [assistant_start] + tokenizer.encode(content) + [assistant_end]
68
  tokens += [assistant_start]
69
 
70
+ generated = []
71
+ use_cuda = device.type == "cuda"
72
+ dtype = torch.bfloat16 if use_cuda else torch.float32
73
+ with torch.amp.autocast(device_type=("cuda" if use_cuda else "cpu"), dtype=dtype):
74
+ for token in model.generate(tokens, max_tokens=max_new_tokens, temperature=temperature, top_k=top_k):
75
+ if token == assistant_end or token == bos:
76
+ break
77
+ generated.append(token)
78
+ return tokenizer.decode(generated)
79
 
80
  with gr.Blocks() as demo:
81
  gr.Markdown("# NanoChat MID")