Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	Upload app.py with huggingface_hub
Browse files
    	
        app.py
    CHANGED
    
    | @@ -7,9 +7,9 @@ os.environ.setdefault("NANOCHAT_BASE_DIR", "/tmp/nanochat") | |
| 7 | 
             
            from huggingface_hub import hf_hub_download
         | 
| 8 | 
             
            import torch
         | 
| 9 | 
             
            import gradio as gr
         | 
| 10 | 
            -
             | 
| 11 | 
            -
             | 
| 12 | 
            -
            from nanochat. | 
| 13 |  | 
| 14 | 
             
            # Hardcoded model selection for this Space
         | 
| 15 | 
             
            MODEL_REPO = "loocorez/nanochat-mid-d20-step765"
         | 
| @@ -19,18 +19,38 @@ DEPTH = "20" | |
| 19 | 
             
            ckpt_dir = f"/tmp/ckpt/d{DEPTH}"
         | 
| 20 | 
             
            os.makedirs(ckpt_dir, exist_ok=True)
         | 
| 21 |  | 
| 22 | 
            -
             | 
| 23 | 
            -
             | 
| 24 | 
            -
             | 
| 25 | 
            -
            hf_hub_download(MODEL_REPO, " | 
| 26 | 
            -
             | 
| 27 | 
            -
             | 
| 28 | 
            -
             | 
| 29 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 30 |  | 
| 31 | 
             
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         | 
| 32 | 
            -
             | 
| 33 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 34 |  | 
| 35 | 
             
            def chat_fn(history, temperature=0.8, top_k=50, max_new_tokens=256):
         | 
| 36 | 
             
                bos = tokenizer.get_bos_token_id()
         | 
| @@ -47,10 +67,15 @@ def chat_fn(history, temperature=0.8, top_k=50, max_new_tokens=256): | |
| 47 | 
             
                        tokens += [assistant_start] + tokenizer.encode(content) + [assistant_end]
         | 
| 48 | 
             
                tokens += [assistant_start]
         | 
| 49 |  | 
| 50 | 
            -
                 | 
| 51 | 
            -
             | 
| 52 | 
            -
                 | 
| 53 | 
            -
                 | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 54 |  | 
| 55 | 
             
            with gr.Blocks() as demo:
         | 
| 56 | 
             
                gr.Markdown("# NanoChat MID")
         | 
|  | |
| 7 | 
             
            from huggingface_hub import hf_hub_download
         | 
| 8 | 
             
            import torch
         | 
| 9 | 
             
            import gradio as gr
         | 
| 10 | 
            +
            import json
         | 
| 11 | 
            +
            import pickle
         | 
| 12 | 
            +
            from nanochat.gpt import GPT, GPTConfig
         | 
| 13 |  | 
| 14 | 
             
            # Hardcoded model selection for this Space
         | 
| 15 | 
             
            MODEL_REPO = "loocorez/nanochat-mid-d20-step765"
         | 
|  | |
| 19 | 
             
            ckpt_dir = f"/tmp/ckpt/d{DEPTH}"
         | 
| 20 | 
             
            os.makedirs(ckpt_dir, exist_ok=True)
         | 
| 21 |  | 
| 22 | 
            +
            tok_local = hf_hub_download(MODEL_REPO, "tokenizer/tokenizer.pkl", local_dir="/tmp", local_dir_use_symlinks=False)
         | 
| 23 | 
            +
             | 
| 24 | 
            +
            model_path = hf_hub_download(MODEL_REPO, f"mid_checkpoints/d{DEPTH}/model_{STEP}.pt", local_dir=ckpt_dir, local_dir_use_symlinks=False)
         | 
| 25 | 
            +
            meta_path = hf_hub_download(MODEL_REPO, f"mid_checkpoints/d{DEPTH}/meta_{STEP}.json", local_dir=ckpt_dir, local_dir_use_symlinks=False)
         | 
| 26 | 
            +
             | 
| 27 | 
            +
            class PklTokenizer:
         | 
| 28 | 
            +
                def __init__(self, pkl_path):
         | 
| 29 | 
            +
                    with open(pkl_path, "rb") as f:
         | 
| 30 | 
            +
                        self.enc = pickle.load(f)
         | 
| 31 | 
            +
                    self._bos_id = self.encode_special("<|bos|>")
         | 
| 32 | 
            +
                def get_bos_token_id(self):
         | 
| 33 | 
            +
                    return self._bos_id
         | 
| 34 | 
            +
                def encode_special(self, text):
         | 
| 35 | 
            +
                    return self.enc.encode_single_token(text)
         | 
| 36 | 
            +
                def encode(self, text):
         | 
| 37 | 
            +
                    return self.enc.encode_ordinary(text)
         | 
| 38 | 
            +
                def decode(self, ids):
         | 
| 39 | 
            +
                    return self.enc.decode(ids)
         | 
| 40 |  | 
| 41 | 
             
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         | 
| 42 | 
            +
            with open(meta_path, "r") as f:
         | 
| 43 | 
            +
                meta = json.load(f)
         | 
| 44 | 
            +
            cfg = GPTConfig(**meta["model_config"])
         | 
| 45 | 
            +
            with torch.device("meta"):
         | 
| 46 | 
            +
                model = GPT(cfg)
         | 
| 47 | 
            +
            model.to_empty(device=device)
         | 
| 48 | 
            +
            model.init_weights()
         | 
| 49 | 
            +
            state = torch.load(model_path, map_location=device)
         | 
| 50 | 
            +
            state = {k.lstrip("_orig_mod."): v for k, v in state.items()}
         | 
| 51 | 
            +
            model.load_state_dict(state, strict=True, assign=True)
         | 
| 52 | 
            +
            model.eval()
         | 
| 53 | 
            +
            tokenizer = PklTokenizer(tok_local)
         | 
| 54 |  | 
| 55 | 
             
            def chat_fn(history, temperature=0.8, top_k=50, max_new_tokens=256):
         | 
| 56 | 
             
                bos = tokenizer.get_bos_token_id()
         | 
|  | |
| 67 | 
             
                        tokens += [assistant_start] + tokenizer.encode(content) + [assistant_end]
         | 
| 68 | 
             
                tokens += [assistant_start]
         | 
| 69 |  | 
| 70 | 
            +
                generated = []
         | 
| 71 | 
            +
                use_cuda = device.type == "cuda"
         | 
| 72 | 
            +
                dtype = torch.bfloat16 if use_cuda else torch.float32
         | 
| 73 | 
            +
                with torch.amp.autocast(device_type=("cuda" if use_cuda else "cpu"), dtype=dtype):
         | 
| 74 | 
            +
                    for token in model.generate(tokens, max_tokens=max_new_tokens, temperature=temperature, top_k=top_k):
         | 
| 75 | 
            +
                        if token == assistant_end or token == bos:
         | 
| 76 | 
            +
                            break
         | 
| 77 | 
            +
                        generated.append(token)
         | 
| 78 | 
            +
                return tokenizer.decode(generated)
         | 
| 79 |  | 
| 80 | 
             
            with gr.Blocks() as demo:
         | 
| 81 | 
             
                gr.Markdown("# NanoChat MID")
         |