Spaces:
Running
Running
Removed multiplicative / inverse stuff
Browse files
app.py
CHANGED
|
@@ -116,12 +116,12 @@ def download_and_upload_non_model_files(base_model_name, output_repo_name):
|
|
| 116 |
shutil.rmtree(temp_config_dir, ignore_errors=True)
|
| 117 |
|
| 118 |
def merge_lora_efficient(hf_token, base_model_name, lora_model_name, output_repo_name,
|
| 119 |
-
scale_factor,
|
| 120 |
temp_lora_dir = None
|
| 121 |
try:
|
| 122 |
# Validate scale factor
|
| 123 |
-
if not (
|
| 124 |
-
error_msg = "Scale factor must be in the range
|
| 125 |
warning_fn(error_msg)
|
| 126 |
return f"✗ Error: {error_msg}"
|
| 127 |
|
|
@@ -165,18 +165,6 @@ def merge_lora_efficient(hf_token, base_model_name, lora_model_name, output_repo
|
|
| 165 |
|
| 166 |
info_fn(f"Found {len(shard_files)} model shards to process")
|
| 167 |
|
| 168 |
-
# Determine merge mode
|
| 169 |
-
if multiplicative_lora and inverse_lora:
|
| 170 |
-
merge_mode = "Multiplicative Inverse"
|
| 171 |
-
elif multiplicative_lora:
|
| 172 |
-
merge_mode = "Multiplicative"
|
| 173 |
-
elif inverse_lora:
|
| 174 |
-
merge_mode = "Additive Inverse"
|
| 175 |
-
else:
|
| 176 |
-
merge_mode = "Additive"
|
| 177 |
-
|
| 178 |
-
info_fn(f"Merge mode: {merge_mode}")
|
| 179 |
-
|
| 180 |
merged_tensors = 0
|
| 181 |
total_shards = len(shard_files)
|
| 182 |
|
|
@@ -214,7 +202,7 @@ def merge_lora_efficient(hf_token, base_model_name, lora_model_name, output_repo
|
|
| 214 |
lora_A, lora_B = find_lora_weights(lora_state, key)
|
| 215 |
|
| 216 |
if lora_A is not None and lora_B is not None:
|
| 217 |
-
info_fn(f"Merging
|
| 218 |
shard_merged_count += 1
|
| 219 |
merged_tensors += 1
|
| 220 |
|
|
@@ -223,32 +211,11 @@ def merge_lora_efficient(hf_token, base_model_name, lora_model_name, output_repo
|
|
| 223 |
tensor = tensor.to(torch.float32)
|
| 224 |
lora_delta = scale * lora_B.to(torch.float32) @ lora_A.to(torch.float32)
|
| 225 |
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
raise ValueError(f"Multiplicative LoRA dimension mismatch for {key}: {lora_delta.shape} vs {tensor.shape}")
|
| 232 |
-
|
| 233 |
-
if inverse_lora:
|
| 234 |
-
# Inverse multiplicative: tensor = (I + lora_delta)^(-1) @ tensor
|
| 235 |
-
identity = torch.eye(lora_delta.shape[0], device=lora_delta.device, dtype=torch.float32)
|
| 236 |
-
inverse_matrix = torch.linalg.inv(identity + lora_delta)
|
| 237 |
-
tensor = inverse_matrix @ tensor
|
| 238 |
-
else:
|
| 239 |
-
# Forward multiplicative: tensor = (I + lora_delta) @ tensor
|
| 240 |
-
tensor += lora_delta @ tensor
|
| 241 |
-
else:
|
| 242 |
-
# Validate dimensions for additive LoRA
|
| 243 |
-
if lora_delta.shape != tensor.shape:
|
| 244 |
-
raise ValueError(f"Additive LoRA dimension mismatch for {key}: {lora_delta.shape} vs {tensor.shape}")
|
| 245 |
-
|
| 246 |
-
if inverse_lora:
|
| 247 |
-
# Inverse additive: tensor = tensor - lora_delta
|
| 248 |
-
tensor -= lora_delta
|
| 249 |
-
else:
|
| 250 |
-
# Forward additive: tensor = tensor + lora_delta
|
| 251 |
-
tensor += lora_delta
|
| 252 |
|
| 253 |
# Convert back to original dtype
|
| 254 |
tensor = tensor.to(original_dtype)
|
|
@@ -284,7 +251,7 @@ def merge_lora_efficient(hf_token, base_model_name, lora_model_name, output_repo
|
|
| 284 |
|
| 285 |
progress(1.0, desc="Upload completed!")
|
| 286 |
|
| 287 |
-
success_msg = f"
|
| 288 |
info_fn("Merge completed successfully!")
|
| 289 |
|
| 290 |
return success_msg
|
|
@@ -310,23 +277,21 @@ This tool merges LoRA (Low-Rank Adaptation) adapters with base models using a me
|
|
| 310 |
- **Streaming Processing**: Downloads → Processes → Uploads → Deletes each shard sequentially
|
| 311 |
- **Automatic Cleanup**: Temporary files are automatically removed after processing
|
| 312 |
- **Progress Tracking**: Real-time status updates throughout the merge process
|
| 313 |
-
- **Advanced Options**:
|
| 314 |
"""
|
| 315 |
|
| 316 |
DETAILS_TEXT = """
|
| 317 |
### How It Works
|
| 318 |
-
LoRA enables efficient fine-tuning by adding small adapter weights rather than modifying the entire model. This tool
|
| 319 |
|
| 320 |
- **Additive LoRA**: `W_new = W + scale × B @ A`
|
| 321 |
-
-
|
| 322 |
-
- **Multiplicative LoRA**: `W_new = W + scale × B @ A @ W`
|
| 323 |
-
- **Multiplicative Inverse**: `W_new = (I + scale × B @ A)^(-1) @ W`
|
| 324 |
|
| 325 |
### Scale Factor
|
| 326 |
-
The scale factor (
|
| 327 |
- **1.0**: Full strength (default)
|
| 328 |
- **0.5**: Half strength
|
| 329 |
-
-
|
| 330 |
|
| 331 |
### Memory Efficiency
|
| 332 |
- **Traditional approach**: Loads entire model (~15GB+ for 7B parameter models)
|
|
@@ -375,22 +340,12 @@ with gr.Blocks(title="Memory-Efficient LoRA Merge", theme=gr.themes.Soft()) as d
|
|
| 375 |
|
| 376 |
gr.Markdown("### Advanced Options")
|
| 377 |
scale_factor = gr.Slider(
|
| 378 |
-
minimum
|
| 379 |
-
maximum=
|
| 380 |
value=1.0,
|
| 381 |
step=0.01,
|
| 382 |
label="Scale Factor",
|
| 383 |
-
info="Strength of LoRA merge (
|
| 384 |
-
)
|
| 385 |
-
multiplicative_lora = gr.Checkbox(
|
| 386 |
-
label="Multiplicative LoRA",
|
| 387 |
-
value=False,
|
| 388 |
-
info="Apply multiplicative LoRA instead of additive LoRA"
|
| 389 |
-
)
|
| 390 |
-
inverse_lora = gr.Checkbox(
|
| 391 |
-
label="Inverse Merge",
|
| 392 |
-
value=False,
|
| 393 |
-
info="Apply inverse operation (subtract/invert the LoRA effect)"
|
| 394 |
)
|
| 395 |
|
| 396 |
with gr.Column(scale=1):
|
|
@@ -408,7 +363,7 @@ with gr.Blocks(title="Memory-Efficient LoRA Merge", theme=gr.themes.Soft()) as d
|
|
| 408 |
submit_btn.click(
|
| 409 |
fn=merge_lora_efficient,
|
| 410 |
inputs=[hf_token, base_model_name, lora_model_name, output_repo_name,
|
| 411 |
-
scale_factor
|
| 412 |
outputs=output_text
|
| 413 |
)
|
| 414 |
|
|
|
|
| 116 |
shutil.rmtree(temp_config_dir, ignore_errors=True)
|
| 117 |
|
| 118 |
def merge_lora_efficient(hf_token, base_model_name, lora_model_name, output_repo_name,
|
| 119 |
+
scale_factor, progress=gr.Progress()):
|
| 120 |
temp_lora_dir = None
|
| 121 |
try:
|
| 122 |
# Validate scale factor
|
| 123 |
+
if not (-2 <= scale_factor <= 2):
|
| 124 |
+
error_msg = "Scale factor must be in the range [-2, 2]"
|
| 125 |
warning_fn(error_msg)
|
| 126 |
return f"✗ Error: {error_msg}"
|
| 127 |
|
|
|
|
| 165 |
|
| 166 |
info_fn(f"Found {len(shard_files)} model shards to process")
|
| 167 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
merged_tensors = 0
|
| 169 |
total_shards = len(shard_files)
|
| 170 |
|
|
|
|
| 202 |
lora_A, lora_B = find_lora_weights(lora_state, key)
|
| 203 |
|
| 204 |
if lora_A is not None and lora_B is not None:
|
| 205 |
+
info_fn(f"Merging LoRA weights for {key}")
|
| 206 |
shard_merged_count += 1
|
| 207 |
merged_tensors += 1
|
| 208 |
|
|
|
|
| 211 |
tensor = tensor.to(torch.float32)
|
| 212 |
lora_delta = scale * lora_B.to(torch.float32) @ lora_A.to(torch.float32)
|
| 213 |
|
| 214 |
+
# Validate dimensions for additive LoRA
|
| 215 |
+
if lora_delta.shape != tensor.shape:
|
| 216 |
+
raise ValueError(f"Additive LoRA dimension mismatch for {key}: {lora_delta.shape} vs {tensor.shape}")
|
| 217 |
+
|
| 218 |
+
tensor += lora_delta
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
|
| 220 |
# Convert back to original dtype
|
| 221 |
tensor = tensor.to(original_dtype)
|
|
|
|
| 251 |
|
| 252 |
progress(1.0, desc="Upload completed!")
|
| 253 |
|
| 254 |
+
success_msg = f"��� Successfully merged and uploaded model!\nModel URL: https://huggingface.co/{output_repo_name}\nScale factor: {scale_factor}\nProcessed {total_shards} shards\nMerged {merged_tensors} layers with LoRA weights"
|
| 255 |
info_fn("Merge completed successfully!")
|
| 256 |
|
| 257 |
return success_msg
|
|
|
|
| 277 |
- **Streaming Processing**: Downloads → Processes → Uploads → Deletes each shard sequentially
|
| 278 |
- **Automatic Cleanup**: Temporary files are automatically removed after processing
|
| 279 |
- **Progress Tracking**: Real-time status updates throughout the merge process
|
| 280 |
+
- **Advanced Options**: Custom scale factors (including negative values)
|
| 281 |
"""
|
| 282 |
|
| 283 |
DETAILS_TEXT = """
|
| 284 |
### How It Works
|
| 285 |
+
LoRA enables efficient fine-tuning by adding small adapter weights rather than modifying the entire model. This tool applies additive merging:
|
| 286 |
|
| 287 |
- **Additive LoRA**: `W_new = W + scale × B @ A`
|
| 288 |
+
- Negative scale values reverse the effect
|
|
|
|
|
|
|
| 289 |
|
| 290 |
### Scale Factor
|
| 291 |
+
The scale factor (-2 ≤ scale ≤ 2) controls the strength of the LoRA merge:
|
| 292 |
- **1.0**: Full strength (default)
|
| 293 |
- **0.5**: Half strength
|
| 294 |
+
- **-1.0**: Reverse effect (removes LoRA impact)
|
| 295 |
|
| 296 |
### Memory Efficiency
|
| 297 |
- **Traditional approach**: Loads entire model (~15GB+ for 7B parameter models)
|
|
|
|
| 340 |
|
| 341 |
gr.Markdown("### Advanced Options")
|
| 342 |
scale_factor = gr.Slider(
|
| 343 |
+
minimum=-2.0,
|
| 344 |
+
maximum=2.0,
|
| 345 |
value=1.0,
|
| 346 |
step=0.01,
|
| 347 |
label="Scale Factor",
|
| 348 |
+
info="Strength of LoRA merge (-2 ≤ scale ≤ 2)"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 349 |
)
|
| 350 |
|
| 351 |
with gr.Column(scale=1):
|
|
|
|
| 363 |
submit_btn.click(
|
| 364 |
fn=merge_lora_efficient,
|
| 365 |
inputs=[hf_token, base_model_name, lora_model_name, output_repo_name,
|
| 366 |
+
scale_factor],
|
| 367 |
outputs=output_text
|
| 368 |
)
|
| 369 |
|