Merge pull request #90 from NanoCode012/feat/addict
Browse files- .github/workflows/tests.yml +3 -1
- requirements.txt +1 -1
- scripts/finetune.py +3 -3
- src/axolotl/utils/dict.py +10 -0
- src/axolotl/utils/models.py +5 -5
- tests/test_dict.py +90 -0
.github/workflows/tests.yml
CHANGED
|
@@ -1,5 +1,7 @@
|
|
| 1 |
name: PyTest
|
| 2 |
-
on:
|
|
|
|
|
|
|
| 3 |
|
| 4 |
jobs:
|
| 5 |
test:
|
|
|
|
| 1 |
name: PyTest
|
| 2 |
+
on:
|
| 3 |
+
push:
|
| 4 |
+
pull_request:
|
| 5 |
|
| 6 |
jobs:
|
| 7 |
test:
|
requirements.txt
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
peft @ git+https://github.com/huggingface/peft.git
|
| 2 |
transformers @ git+https://github.com/huggingface/transformers.git
|
| 3 |
bitsandbytes>=0.39.0
|
| 4 |
-
|
| 5 |
fire
|
| 6 |
PyYAML==6.0
|
| 7 |
black
|
|
|
|
| 1 |
peft @ git+https://github.com/huggingface/peft.git
|
| 2 |
transformers @ git+https://github.com/huggingface/transformers.git
|
| 3 |
bitsandbytes>=0.39.0
|
| 4 |
+
addict
|
| 5 |
fire
|
| 6 |
PyYAML==6.0
|
| 7 |
black
|
scripts/finetune.py
CHANGED
|
@@ -10,11 +10,11 @@ from typing import Optional, List, Dict, Any, Union
|
|
| 10 |
import fire
|
| 11 |
import torch
|
| 12 |
import yaml
|
| 13 |
-
from attrdict import AttrDefault
|
| 14 |
|
| 15 |
# add src to the pythonpath so we don't need to pip install this
|
| 16 |
from axolotl.utils.tokenization import check_dataset_labels
|
| 17 |
from axolotl.utils.validation import validate_config
|
|
|
|
| 18 |
|
| 19 |
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
| 20 |
src_dir = os.path.join(project_root, "src")
|
|
@@ -131,10 +131,10 @@ def train(
|
|
| 131 |
|
| 132 |
# load the config from the yaml file
|
| 133 |
with open(config, "r") as f:
|
| 134 |
-
cfg:
|
| 135 |
# if there are any options passed in the cli, if it is something that seems valid from the yaml,
|
| 136 |
# then overwrite the value
|
| 137 |
-
cfg_keys =
|
| 138 |
for k in kwargs:
|
| 139 |
# if not strict, allow writing to cfg even if it's not in the yml already
|
| 140 |
if k in cfg_keys or cfg.strict is False:
|
|
|
|
| 10 |
import fire
|
| 11 |
import torch
|
| 12 |
import yaml
|
|
|
|
| 13 |
|
| 14 |
# add src to the pythonpath so we don't need to pip install this
|
| 15 |
from axolotl.utils.tokenization import check_dataset_labels
|
| 16 |
from axolotl.utils.validation import validate_config
|
| 17 |
+
from axolotl.utils.dict import DictDefault
|
| 18 |
|
| 19 |
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
| 20 |
src_dir = os.path.join(project_root, "src")
|
|
|
|
| 131 |
|
| 132 |
# load the config from the yaml file
|
| 133 |
with open(config, "r") as f:
|
| 134 |
+
cfg: DictDefault = DictDefault(yaml.load(f, Loader=yaml.Loader))
|
| 135 |
# if there are any options passed in the cli, if it is something that seems valid from the yaml,
|
| 136 |
# then overwrite the value
|
| 137 |
+
cfg_keys = cfg.keys()
|
| 138 |
for k in kwargs:
|
| 139 |
# if not strict, allow writing to cfg even if it's not in the yml already
|
| 140 |
if k in cfg_keys or cfg.strict is False:
|
src/axolotl/utils/dict.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from addict import Dict
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class DictDefault(Dict):
|
| 5 |
+
"""
|
| 6 |
+
A Dict that returns None instead of returning empty Dict for missing keys.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
def __missing__(self, key):
|
| 10 |
+
return None
|
src/axolotl/utils/models.py
CHANGED
|
@@ -29,7 +29,7 @@ from axolotl.prompt_tokenizers import LLAMA_DEFAULT_PAD_TOKEN
|
|
| 29 |
|
| 30 |
if TYPE_CHECKING:
|
| 31 |
from peft import PeftModel, PeftConfig
|
| 32 |
-
from
|
| 33 |
from transformers import PreTrainedTokenizer
|
| 34 |
|
| 35 |
|
|
@@ -79,7 +79,7 @@ def load_model(
|
|
| 79 |
adapter="lora",
|
| 80 |
inference=False,
|
| 81 |
):
|
| 82 |
-
# type: (str, str, str, str,
|
| 83 |
|
| 84 |
# TODO refactor as a kwarg
|
| 85 |
load_in_8bit = cfg.load_in_8bit
|
|
@@ -294,7 +294,7 @@ def load_model(
|
|
| 294 |
|
| 295 |
|
| 296 |
def load_adapter(model, cfg, adapter):
|
| 297 |
-
# type: (PreTrainedModel,
|
| 298 |
|
| 299 |
if adapter is None:
|
| 300 |
return model, None
|
|
@@ -307,7 +307,7 @@ def load_adapter(model, cfg, adapter):
|
|
| 307 |
|
| 308 |
|
| 309 |
def load_llama_adapter(model, cfg):
|
| 310 |
-
# type: (PreTrainedModel,
|
| 311 |
from peft import (
|
| 312 |
AdaptionPromptConfig,
|
| 313 |
get_peft_model,
|
|
@@ -355,7 +355,7 @@ def find_all_linear_names(bits, model):
|
|
| 355 |
|
| 356 |
|
| 357 |
def load_lora(model, cfg):
|
| 358 |
-
# type: (PreTrainedModel,
|
| 359 |
|
| 360 |
from peft import (
|
| 361 |
LoraConfig,
|
|
|
|
| 29 |
|
| 30 |
if TYPE_CHECKING:
|
| 31 |
from peft import PeftModel, PeftConfig
|
| 32 |
+
from axolotl.utils.dict import DictDefault
|
| 33 |
from transformers import PreTrainedTokenizer
|
| 34 |
|
| 35 |
|
|
|
|
| 79 |
adapter="lora",
|
| 80 |
inference=False,
|
| 81 |
):
|
| 82 |
+
# type: (str, str, str, str, DictDefault, Optional[str], bool) -> Tuple[PreTrainedModel, PreTrainedTokenizer, Optional[PeftConfig]]
|
| 83 |
|
| 84 |
# TODO refactor as a kwarg
|
| 85 |
load_in_8bit = cfg.load_in_8bit
|
|
|
|
| 294 |
|
| 295 |
|
| 296 |
def load_adapter(model, cfg, adapter):
|
| 297 |
+
# type: (PreTrainedModel, DictDefault, Optional[str]) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
|
| 298 |
|
| 299 |
if adapter is None:
|
| 300 |
return model, None
|
|
|
|
| 307 |
|
| 308 |
|
| 309 |
def load_llama_adapter(model, cfg):
|
| 310 |
+
# type: (PreTrainedModel, DictDefault) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
|
| 311 |
from peft import (
|
| 312 |
AdaptionPromptConfig,
|
| 313 |
get_peft_model,
|
|
|
|
| 355 |
|
| 356 |
|
| 357 |
def load_lora(model, cfg):
|
| 358 |
+
# type: (PreTrainedModel, DictDefault) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
|
| 359 |
|
| 360 |
from peft import (
|
| 361 |
LoraConfig,
|
tests/test_dict.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import unittest
|
| 2 |
+
|
| 3 |
+
import pytest
|
| 4 |
+
|
| 5 |
+
from axolotl.utils.dict import DictDefault
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class DictDefaultTest(unittest.TestCase):
|
| 9 |
+
def test_dict_default(self):
|
| 10 |
+
cfg = DictDefault(
|
| 11 |
+
{
|
| 12 |
+
"key_a": {"key_b": "value_a"},
|
| 13 |
+
"key_c": "value_c",
|
| 14 |
+
"key_d": ["value_d", "value_e"],
|
| 15 |
+
}
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
assert (
|
| 19 |
+
cfg.key_a.key_b == "value_a"
|
| 20 |
+
), "DictDefault should return value for existing nested keys"
|
| 21 |
+
|
| 22 |
+
assert (
|
| 23 |
+
cfg.key_c == "value_c"
|
| 24 |
+
), "DictDefault should return value for existing keys"
|
| 25 |
+
|
| 26 |
+
assert (
|
| 27 |
+
cfg.key_d[0] == "value_d"
|
| 28 |
+
), "DictDefault should return value for existing keys in list"
|
| 29 |
+
|
| 30 |
+
assert (
|
| 31 |
+
"value_e" in cfg.key_d
|
| 32 |
+
), "DictDefault should support in operator for existing keys in list"
|
| 33 |
+
|
| 34 |
+
def test_dict_or_operator(self):
|
| 35 |
+
cfg = DictDefault(
|
| 36 |
+
{
|
| 37 |
+
"key_a": {"key_b": "value_a"},
|
| 38 |
+
"key_c": "value_c",
|
| 39 |
+
"key_d": ["value_d", "value_e"],
|
| 40 |
+
"key_f": "value_f",
|
| 41 |
+
}
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
cfg = cfg | DictDefault({"key_a": {"key_b": "value_b"}, "key_f": "value_g"})
|
| 45 |
+
|
| 46 |
+
assert (
|
| 47 |
+
cfg.key_a.key_b == "value_b"
|
| 48 |
+
), "DictDefault should support OR operator for existing nested keys"
|
| 49 |
+
|
| 50 |
+
assert cfg.key_c == "value_c", "DictDefault should not delete existing key"
|
| 51 |
+
|
| 52 |
+
assert cfg.key_d == [
|
| 53 |
+
"value_d",
|
| 54 |
+
"value_e",
|
| 55 |
+
], "DictDefault should not overwrite existing keys in list"
|
| 56 |
+
|
| 57 |
+
assert (
|
| 58 |
+
cfg.key_f == "value_g"
|
| 59 |
+
), "DictDefault should support OR operator for existing key"
|
| 60 |
+
|
| 61 |
+
def test_dict_missingkey(self):
|
| 62 |
+
cfg = DictDefault({})
|
| 63 |
+
|
| 64 |
+
assert cfg.random_key is None, "DictDefault should return None for missing keys"
|
| 65 |
+
|
| 66 |
+
def test_dict_nested_missingparentkey(self):
|
| 67 |
+
"""
|
| 68 |
+
Due to subclassing Dict, DictDefault will error if we try to access a nested key whose parent key does not exist.
|
| 69 |
+
"""
|
| 70 |
+
cfg = DictDefault({})
|
| 71 |
+
|
| 72 |
+
with pytest.raises(
|
| 73 |
+
AttributeError,
|
| 74 |
+
match=r"'NoneType' object has no attribute 'another_random_key'",
|
| 75 |
+
):
|
| 76 |
+
cfg.random_key.another_random_key
|
| 77 |
+
|
| 78 |
+
def test_dict_shorthand_assignment(self):
|
| 79 |
+
"""
|
| 80 |
+
Shorthand assignment is said to not be supported if subclassed. However, their example raises error instead of None.
|
| 81 |
+
This test ensures that it is supported for current implementation.
|
| 82 |
+
|
| 83 |
+
Ref: https://github.com/mewwts/addict#default-values
|
| 84 |
+
"""
|
| 85 |
+
|
| 86 |
+
cfg = DictDefault({"key_a": {"key_b": "value_a"}})
|
| 87 |
+
|
| 88 |
+
cfg.key_a.key_b = "value_b"
|
| 89 |
+
|
| 90 |
+
assert cfg.key_a.key_b == "value_b", "Shorthand assignment should be supported"
|