Spaces:

Dovakiins
/

qwerrwe

Build error

App Files Files Community

winglian commited on May 31, 2023

Commit

c7021e1

unverified ·

2 Parent(s): 876edd8 e3c494c

Merge pull request #120 from OpenAccess-AI-Collective/model-from-path

Browse files

split up llama model loading so config can be loaded from base config and models can be loaded from a path

Files changed (3) hide show

README.md +3 -0
scripts/finetune.py +3 -2
src/axolotl/utils/models.py +12 -6

README.md CHANGED Viewed

@@ -171,6 +171,9 @@ base_model_ignore_patterns:
 # if the base_model repo on hf hub doesn't include configuration .json files,
 # you can set that here, or leave this empty to default to base_model
 base_model_config: ./llama-7b-hf
 # If you want to specify the type of model to load, AutoModelForCausalLM is a good choice too
 model_type: AutoModelForCausalLM
 # Corresponding tokenizer for the model AutoTokenizer is a good choice

 # if the base_model repo on hf hub doesn't include configuration .json files,
 # you can set that here, or leave this empty to default to base_model
 base_model_config: ./llama-7b-hf
+# Optional tokenizer configuration override in case you want to use a different tokenizer
+# than the one defined in the base model
+tokenizer_config:
 # If you want to specify the type of model to load, AutoModelForCausalLM is a good choice too
 model_type: AutoModelForCausalLM
 # Corresponding tokenizer for the model AutoTokenizer is a good choice

scripts/finetune.py CHANGED Viewed

@@ -173,8 +173,9 @@ def train(
         cfg.bf16 = False
     # load the tokenizer first
-    logging.info("loading tokenizer...")
-    tokenizer = load_tokenizer(cfg.base_model_config, cfg.tokenizer_type, cfg)
     if check_not_in(
         ["inference", "shard", "merge_lora"], kwargs

         cfg.bf16 = False
     # load the tokenizer first
+    tokenizer_config = cfg.tokenizer_config or cfg.base_model_config
+    logging.info(f"loading tokenizer... {tokenizer_config}")
+    tokenizer = load_tokenizer(tokenizer_config, cfg.tokenizer_type, cfg)
     if check_not_in(
         ["inference", "shard", "merge_lora"], kwargs

src/axolotl/utils/models.py CHANGED Viewed

@@ -10,9 +10,14 @@ from typing import TYPE_CHECKING, Optional, Tuple  # noqa: F401
 import bitsandbytes as bnb
 import torch
 import transformers
-from transformers import AutoModelForCausalLM  # noqa: F401
 from transformers import PreTrainedModel  # noqa: F401
-from transformers import AutoConfig, AutoTokenizer, BitsAndBytesConfig
 try:
     from transformers import LlamaForCausalLM
@@ -25,24 +30,23 @@ from axolotl.prompt_tokenizers import LLAMA_DEFAULT_PAD_TOKEN
 if TYPE_CHECKING:
     from peft import PeftConfig  # noqa: F401
-    from transformers import PreTrainedTokenizer  # noqa: F401
     from axolotl.utils.dict import DictDefault  # noqa: F401
 def load_tokenizer(
-    base_model_config,
     tokenizer_type,
     cfg,
 ):
     if tokenizer_type:
         tokenizer = getattr(transformers, tokenizer_type).from_pretrained(
-            base_model_config,
             trust_remote_code=cfg.trust_remote_code or False,
         )
     else:
         tokenizer = AutoTokenizer.from_pretrained(
-            base_model_config,
             trust_remote_code=cfg.trust_remote_code or False,
         )
@@ -172,8 +176,10 @@ def load_model(
             )
             load_in_8bit = False
         elif is_llama_derived_model and "LlamaForCausalLM" in globals():
             model = LlamaForCausalLM.from_pretrained(
                 base_model,
                 load_in_8bit=cfg.load_in_8bit and cfg.adapter is not None,
                 load_in_4bit=cfg.load_in_4bit and cfg.adapter is not None,
                 torch_dtype=torch_dtype,

 import bitsandbytes as bnb
 import torch
 import transformers
 from transformers import PreTrainedModel  # noqa: F401
+from transformers import (  # noqa: F401
+    AutoConfig,
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    BitsAndBytesConfig,
+    LlamaConfig,
+)
 try:
     from transformers import LlamaForCausalLM
 if TYPE_CHECKING:
     from peft import PeftConfig  # noqa: F401
     from axolotl.utils.dict import DictDefault  # noqa: F401
 def load_tokenizer(
+    tokenizer_config,
     tokenizer_type,
     cfg,
 ):
     if tokenizer_type:
         tokenizer = getattr(transformers, tokenizer_type).from_pretrained(
+            tokenizer_config,
             trust_remote_code=cfg.trust_remote_code or False,
         )
     else:
         tokenizer = AutoTokenizer.from_pretrained(
+            tokenizer_config,
             trust_remote_code=cfg.trust_remote_code or False,
         )
             )
             load_in_8bit = False
         elif is_llama_derived_model and "LlamaForCausalLM" in globals():
+            config = LlamaConfig.from_pretrained(base_model_config)
             model = LlamaForCausalLM.from_pretrained(
                 base_model,
+                config=config,
                 load_in_8bit=cfg.load_in_8bit and cfg.adapter is not None,
                 load_in_4bit=cfg.load_in_4bit and cfg.adapter is not None,
                 torch_dtype=torch_dtype,