|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import pytest |
|
|
import torch |
|
|
from torch import nn |
|
|
|
|
|
from peft import LoraConfig, get_peft_model |
|
|
from peft.tuners.lora.layer import Conv1d as LoraConv1d |
|
|
from peft.tuners.lora.layer import Conv2d as LoraConv2d |
|
|
from peft.tuners.lora.layer import Embedding as LoraEmbedding |
|
|
from peft.tuners.lora.layer import Linear as LoraLinear |
|
|
from peft.tuners.lora.variants import ( |
|
|
ALoraLinearVariant, |
|
|
DoraConv1dVariant, |
|
|
DoraConv2dVariant, |
|
|
DoraEmbeddingVariant, |
|
|
DoraLinearVariant, |
|
|
calculate_alora_offsets, |
|
|
get_alora_offsets_for_forward, |
|
|
get_alora_offsets_for_generate, |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
class CustomModel(nn.Module): |
|
|
"""pytorch module that contains common targetable layers (linear, embedding, conv, ...)""" |
|
|
|
|
|
def __init__(self, num_embeddings=100, embedding_dim=16, num_classes=10): |
|
|
super().__init__() |
|
|
self.embedding = nn.Embedding(num_embeddings, embedding_dim) |
|
|
self.conv1d = nn.Conv1d(in_channels=embedding_dim, out_channels=32, kernel_size=3, padding=1) |
|
|
self.conv2d = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, stride=1, padding=1) |
|
|
self.flatten = nn.Flatten() |
|
|
self.dummy_conv1d_output_dim = 32 * 10 |
|
|
self.dummy_conv2d_output_dim = 16 * 10 * 10 |
|
|
self.linear1 = nn.Linear(self.dummy_conv1d_output_dim + self.dummy_conv2d_output_dim, 64) |
|
|
self.linear2 = nn.Linear(64, num_classes) |
|
|
self.relu = nn.ReLU() |
|
|
|
|
|
def forward(self, input_ids, dummy_image_input): |
|
|
|
|
|
x1 = self.embedding(input_ids) |
|
|
x1 = x1.transpose(1, 2) |
|
|
x1 = self.relu(self.conv1d(x1)) |
|
|
x1_flat = self.flatten(x1) |
|
|
|
|
|
x2 = self.relu(self.conv2d(dummy_image_input)) |
|
|
x2_flat = self.flatten(x2) |
|
|
|
|
|
|
|
|
|
|
|
output = self.relu(self.linear1(torch.concat([x1_flat, x2_flat], dim=1))) |
|
|
output = self.linear2(output) |
|
|
return output |
|
|
|
|
|
|
|
|
|
|
|
class DummyLM(nn.Module): |
|
|
def __init__(self, vocab_size: int = 10, hidden_dim: int = 8): |
|
|
super().__init__() |
|
|
self.embed = nn.Embedding(vocab_size, hidden_dim) |
|
|
self.linear = nn.Linear(hidden_dim, vocab_size) |
|
|
|
|
|
def forward(self, X=None, embeds=None, num_beams=None, alora_offsets=None): |
|
|
if X is not None: |
|
|
embeds = self.embed(X) |
|
|
return self.linear(embeds) |
|
|
|
|
|
|
|
|
class MockTransformerWrapper: |
|
|
"""Mock class to behave like a transformers model. |
|
|
|
|
|
This is needed because the tests initialize the model by calling transformers_class.from_pretrained. |
|
|
|
|
|
""" |
|
|
|
|
|
@classmethod |
|
|
def from_pretrained(cls): |
|
|
|
|
|
torch.manual_seed(0) |
|
|
|
|
|
torch_dtype = torch.float32 |
|
|
|
|
|
return DummyLM().to(torch_dtype) |
|
|
|
|
|
|
|
|
VARIANT_MAP = { |
|
|
"dora": { |
|
|
LoraLinear: DoraLinearVariant, |
|
|
LoraEmbedding: DoraEmbeddingVariant, |
|
|
LoraConv1d: DoraConv1dVariant, |
|
|
LoraConv2d: DoraConv2dVariant, |
|
|
}, |
|
|
"alora": { |
|
|
LoraLinear: ALoraLinearVariant, |
|
|
}, |
|
|
} |
|
|
|
|
|
|
|
|
TEST_CASES = [ |
|
|
( |
|
|
"dora", |
|
|
LoraConfig, |
|
|
{"target_modules": ["linear1", "linear2", "conv1d", "conv2d", "embedding"], "use_dora": True}, |
|
|
), |
|
|
( |
|
|
"alora", |
|
|
LoraConfig, |
|
|
{"target_modules": ["linear1", "linear2"], "alora_invocation_tokens": [1]}, |
|
|
), |
|
|
] |
|
|
|
|
|
|
|
|
class TestLoraVariants: |
|
|
@pytest.mark.parametrize("variant_name, config_cls, config_kwargs", TEST_CASES) |
|
|
def test_variant_is_applied_to_layers(self, variant_name, config_cls, config_kwargs): |
|
|
|
|
|
|
|
|
|
|
|
base_model = CustomModel() |
|
|
peft_config = config_cls(**config_kwargs) |
|
|
peft_model = get_peft_model(base_model, peft_config) |
|
|
|
|
|
layer_type_map = VARIANT_MAP[variant_name] |
|
|
|
|
|
for _, module in peft_model.named_modules(): |
|
|
if not hasattr(module, "lora_variant"): |
|
|
continue |
|
|
|
|
|
|
|
|
|
|
|
expected_variant_type = layer_type_map.get(type(module), None) |
|
|
if not expected_variant_type: |
|
|
continue |
|
|
|
|
|
assert isinstance(module.lora_variant["default"], expected_variant_type) |
|
|
|
|
|
def custom_model_with_loss_backpropagated(self, peft_config): |
|
|
"""Returns the CustomModel + PEFT model instance with a dummy loss that was backpropagated once.""" |
|
|
base_model = CustomModel() |
|
|
peft_model = get_peft_model(base_model, peft_config) |
|
|
|
|
|
x, y = torch.ones(10, 10).long(), torch.ones(10, 1, 10, 10) |
|
|
out = peft_model(x, y) |
|
|
loss = out.sum() |
|
|
loss.backward() |
|
|
|
|
|
return base_model, peft_model |
|
|
|
|
|
def test_dora_params_have_gradients(self): |
|
|
"""Ensure that the parameters added by the DoRA variant are participating in the output computation.""" |
|
|
layer_names = ["linear1", "linear2", "conv1d", "conv2d", "embedding"] |
|
|
peft_config = LoraConfig(target_modules=layer_names, use_dora=True) |
|
|
base_model, peft_model = self.custom_model_with_loss_backpropagated(peft_config) |
|
|
|
|
|
for layer in layer_names: |
|
|
assert getattr(peft_model.base_model.model, layer).lora_magnitude_vector["default"].weight.grad is not None |
|
|
|
|
|
|
|
|
class TestActivatedLora: |
|
|
@pytest.mark.parametrize( |
|
|
"input_ids, alora_invocation_tokens, expected_offsets", |
|
|
[ |
|
|
([[0, 1, 2, 3], [0, 4, 5, 6]], [1, 2], [3, None]), |
|
|
([[1, 2, 1, 2], [0, 4, 1, 2]], [1, 2], [2, 2]), |
|
|
([[1, 2, 3, 4], [0, 4, 1, 4]], [1, 2], [4, None]), |
|
|
([[1, 2, 3, 4]], None, [None]), |
|
|
], |
|
|
) |
|
|
|
|
|
def test_calculate_alora_offsets(self, input_ids, alora_invocation_tokens, expected_offsets): |
|
|
config = LoraConfig(alora_invocation_tokens=alora_invocation_tokens) |
|
|
peft_config = {"default": config} |
|
|
|
|
|
|
|
|
offsets = calculate_alora_offsets(peft_config, "default", torch.tensor(input_ids)) |
|
|
|
|
|
assert offsets == expected_offsets |
|
|
|
|
|
@pytest.mark.parametrize( |
|
|
"input_ids, alora_invocations, expected_offsets", |
|
|
[ |
|
|
([[0, 1, 1], [0, 2, 2]], {"a1": [1], "a2": [2]}, [1, 1]), |
|
|
([[0, 1, 1], [0, 2, 2]], {"a1": [1], "a2": None}, [1, None]), |
|
|
], |
|
|
) |
|
|
|
|
|
def test_calculate_alora_offsets_with_adapter_names(self, input_ids, alora_invocations, expected_offsets): |
|
|
peft_config = {} |
|
|
for alora_name in alora_invocations.keys(): |
|
|
peft_config[alora_name] = LoraConfig(alora_invocation_tokens=alora_invocations[alora_name]) |
|
|
|
|
|
adapter_names = list(alora_invocations.keys()) |
|
|
offsets = calculate_alora_offsets( |
|
|
peft_config, adapter_names[0], torch.tensor(input_ids), adapter_names=adapter_names |
|
|
) |
|
|
|
|
|
assert offsets == expected_offsets |
|
|
|
|
|
|
|
|
def test_alora_activation_matches_base_until_invocation(self): |
|
|
transformers_class = MockTransformerWrapper |
|
|
base_model = transformers_class.from_pretrained() |
|
|
cfg = LoraConfig(target_modules=["linear"], alora_invocation_tokens=[2], init_lora_weights=False) |
|
|
lora_model = get_peft_model(base_model, cfg) |
|
|
lora_model.eval() |
|
|
|
|
|
input_ids = torch.tensor([[0, 1, 2, 3]]) |
|
|
start = 2 |
|
|
with lora_model.disable_adapter(): |
|
|
with torch.no_grad(): |
|
|
base_out = lora_model(X=input_ids) |
|
|
|
|
|
kwargs = get_alora_offsets_for_forward(lora_model, input_ids) |
|
|
with torch.no_grad(): |
|
|
lora_out = lora_model(X=input_ids, **kwargs) |
|
|
assert torch.allclose(lora_out[:, :start], base_out[:, :start]) |
|
|
assert not torch.allclose(lora_out[:, start:], base_out[:, start:]) |
|
|
|
|
|
|
|
|
def test_input_embeds_warning(self): |
|
|
transformers_class = MockTransformerWrapper |
|
|
base_model = transformers_class.from_pretrained() |
|
|
cfg = LoraConfig(target_modules=["linear"], alora_invocation_tokens=[2], init_lora_weights=False) |
|
|
lora_model = get_peft_model(base_model, cfg) |
|
|
lora_model.eval() |
|
|
|
|
|
input_ids = torch.tensor([[0, 1, 2, 3]]) |
|
|
input_embeds = base_model.embed(input_ids) |
|
|
with pytest.warns( |
|
|
UserWarning, |
|
|
match="Cannot calculate aLoRA offsets when only inputs_embeds are provided. Disabling aLoRA for this forward pass.", |
|
|
): |
|
|
kwargs = get_alora_offsets_for_forward(lora_model, inputs_embeds=input_embeds) |
|
|
assert kwargs.get("alora_offsets") is None |
|
|
with pytest.warns( |
|
|
UserWarning, |
|
|
match="Cannot calculate aLoRA offsets during generate as input_ids are not available. Disabling aLoRA.", |
|
|
): |
|
|
kwargs = get_alora_offsets_for_generate(lora_model, inputs_embeds=input_embeds) |
|
|
assert kwargs.get("alora_offsets") is None |
|
|
|
|
|
|
|
|
def test_num_beams_error(self): |
|
|
transformers_class = MockTransformerWrapper |
|
|
base_model = transformers_class.from_pretrained() |
|
|
cfg = LoraConfig(target_modules=["linear"], alora_invocation_tokens=[2], init_lora_weights=False) |
|
|
lora_model = get_peft_model(base_model, cfg) |
|
|
lora_model.eval() |
|
|
|
|
|
input_ids = torch.tensor([[0, 1, 2, 3]]) |
|
|
with pytest.raises(ValueError) as e: |
|
|
with torch.no_grad(): |
|
|
lora_out = lora_model(X=input_ids, num_beams=2, alora_offsets=[3]) |
|
|
assert "Beam search not yet supported for aLoRA." in str(e.value) |
|
|
|