DZRobo commited on
Commit
c86b5e0
·
1 Parent(s): 7dd5fc1

per‑step CFG scheduling

Browse files

Introduces per‑step CFG schedules (cosine, warmup, U‑shape) driven by log‑sigma progress; schedules stack with existing cfg_curve.
Presset finetune.

Files changed (2) hide show
  1. mod/easy/mg_cade25_easy.py +78 -71
  2. pressets/mg_cade25.cfg +36 -35
mod/easy/mg_cade25_easy.py CHANGED
@@ -1378,6 +1378,73 @@ def _wrap_model_with_guidance(model, guidance_mode: str, rescale_multiplier: flo
1378
  except Exception:
1379
  return None
1380
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1381
  # Allow hybrid switch per-step
1382
  mode = guidance_mode
1383
  if guidance_mode == "ZeResFDG":
@@ -1448,7 +1515,7 @@ def _wrap_model_with_guidance(model, guidance_mode: str, rescale_multiplier: flo
1448
  lg = _local_gain_for((cond.shape[-2], cond.shape[-1]))
1449
  if lg is not None:
1450
  resid = resid * lg.expand(-1, resid.shape[1], -1, -1)
1451
- noise_pred = uncond * alpha + cond_scale * resid
1452
  return noise_pred
1453
 
1454
  # RescaleCFG/FDG path (with optional momentum/perp damping and S-curve shaping)
@@ -1496,76 +1563,7 @@ def _wrap_model_with_guidance(model, guidance_mode: str, rescale_multiplier: flo
1496
  delta = delta * lg.expand(-1, delta.shape[1], -1, -1)
1497
  cond = uncond + delta
1498
 
1499
- cond_scale_eff = cond_scale
1500
- curve_gain = 1.0
1501
- if cfg_curve > 0.0 and (sigma is not None):
1502
- s = sigma
1503
- if s.ndim > 1:
1504
- s = s.flatten()
1505
- s_max = float(torch.max(s).item())
1506
- s_min = float(torch.min(s).item())
1507
- if sigma_seen["max"] is None:
1508
- sigma_seen["max"] = s_max
1509
- sigma_seen["min"] = s_min
1510
- else:
1511
- sigma_seen["max"] = max(sigma_seen["max"], s_max)
1512
- sigma_seen["min"] = min(sigma_seen["min"], s_min)
1513
- lo = max(1e-6, sigma_seen["min"])
1514
- hi = max(lo * (1.0 + 1e-6), sigma_seen["max"])
1515
- t = (torch.log(s + 1e-6) - torch.log(torch.tensor(lo, device=sigma.device))) / (torch.log(torch.tensor(hi, device=sigma.device)) - torch.log(torch.tensor(lo, device=sigma.device)) + 1e-6)
1516
- t = t.clamp(0.0, 1.0)
1517
- k = 6.0 * float(cfg_curve)
1518
- s_curve = torch.tanh((t - 0.5) * k)
1519
- g = 1.0 + 0.15 * float(cfg_curve) * s_curve
1520
- if g.ndim > 0:
1521
- g = g.mean().item()
1522
- curve_gain = float(g)
1523
- cond_scale_eff = cond_scale * curve_gain
1524
-
1525
- # Per-step CFG schedule (cosine/warmup/U) using normalized sigma progress
1526
- if isinstance(cfg_sched_type, str) and cfg_sched_type.lower() != "off" and (sigma is not None):
1527
- try:
1528
- s = sigma
1529
- if s.ndim > 1:
1530
- s = s.flatten()
1531
- s_max = float(torch.max(s).item())
1532
- s_min = float(torch.min(s).item())
1533
- if sigma_seen["max"] is None:
1534
- sigma_seen["max"] = s_max
1535
- sigma_seen["min"] = s_min
1536
- else:
1537
- sigma_seen["max"] = max(sigma_seen["max"], s_max)
1538
- sigma_seen["min"] = min(sigma_seen["min"], s_min)
1539
- lo = max(1e-6, sigma_seen["min"])
1540
- hi = max(lo * (1.0 + 1e-6), sigma_seen["max"])
1541
- t = (torch.log(s + 1e-6) - torch.log(torch.tensor(lo, device=sigma.device))) / (torch.log(torch.tensor(hi, device=sigma.device)) - torch.log(torch.tensor(lo, device=sigma.device)) + 1e-6)
1542
- t = t.clamp(0.0, 1.0)
1543
- if t.ndim > 0:
1544
- t_val = float(t.mean().item())
1545
- else:
1546
- t_val = float(t.item())
1547
- cmin = float(max(0.0, cfg_sched_min))
1548
- cmax = float(max(cmin, cfg_sched_max))
1549
- tp = cfg_sched_type.lower()
1550
- if tp == "cosine":
1551
- import math
1552
- cfg_val = cmax - (cmax - cmin) * 0.5 * (1.0 + math.cos(math.pi * t_val))
1553
- elif tp in ("warmup", "warm-up", "linear"):
1554
- g = float(max(0.0, min(1.0, t_val))) ** float(max(0.1, cfg_sched_gamma))
1555
- cfg_val = cmin + (cmax - cmin) * g
1556
- elif tp in ("u", "u-shape", "ushape"):
1557
- # edges high, middle low; power to control concavity
1558
- e = 4.0 * (t_val - 0.5) * (t_val - 0.5)
1559
- e = float(min(1.0, max(0.0, e)))
1560
- e = e ** float(max(0.1, cfg_sched_u_pow))
1561
- cfg_val = cmin + (cmax - cmin) * e
1562
- else:
1563
- cfg_val = cond_scale_eff
1564
- # Keep curve shaping as a multiplier on top of scheduled absolute value
1565
- shape = (cond_scale_eff / float(cond_scale)) if float(cond_scale) != 0.0 else 1.0
1566
- cond_scale_eff = float(cfg_val) * float(shape)
1567
- except Exception:
1568
- pass
1569
 
1570
  # Epsilon scaling (exposure bias correction): early steps get multiplier closer to (1 + eps_scale)
1571
  eps_mult = 1.0
@@ -2324,6 +2322,15 @@ class ComfyAdaptiveDetailEnhancer25:
2324
  __cade_noop = 0 # ensure non-empty with-block
2325
  # Latent buffer runtime state
2326
  lb_state = {"z_ema": None, "anchor": None, "drift_last": None, "ref_dist_last": None}
 
 
 
 
 
 
 
 
 
2327
 
2328
  # Preflight: reset sticky state and build external masks once (CPU-pinned)
2329
  try:
 
1378
  except Exception:
1379
  return None
1380
 
1381
+ # Compute effective cond scale before any branch, so schedules apply in all modes
1382
+ cond_scale_eff = cond_scale
1383
+ curve_gain = 1.0
1384
+ if cfg_curve > 0.0 and (sigma is not None):
1385
+ s = sigma
1386
+ if s.ndim > 1:
1387
+ s = s.flatten()
1388
+ s_max = float(torch.max(s).item())
1389
+ s_min = float(torch.min(s).item())
1390
+ if sigma_seen["max"] is None:
1391
+ sigma_seen["max"] = s_max
1392
+ sigma_seen["min"] = s_min
1393
+ else:
1394
+ sigma_seen["max"] = max(sigma_seen["max"], s_max)
1395
+ sigma_seen["min"] = min(sigma_seen["min"], s_min)
1396
+ lo = max(1e-6, sigma_seen["min"])
1397
+ hi = max(lo * (1.0 + 1e-6), sigma_seen["max"])
1398
+ t = (torch.log(s + 1e-6) - torch.log(torch.tensor(lo, device=sigma.device))) / (torch.log(torch.tensor(hi, device=sigma.device)) - torch.log(torch.tensor(lo, device=sigma.device)) + 1e-6)
1399
+ t = t.clamp(0.0, 1.0)
1400
+ k = 6.0 * float(cfg_curve)
1401
+ s_curve = torch.tanh((t - 0.5) * k)
1402
+ g = 1.0 + 0.15 * float(cfg_curve) * s_curve
1403
+ if g.ndim > 0:
1404
+ g = g.mean().item()
1405
+ curve_gain = float(g)
1406
+ cond_scale_eff = cond_scale * curve_gain
1407
+
1408
+ if isinstance(cfg_sched_type, str) and cfg_sched_type.lower() != "off" and (sigma is not None):
1409
+ try:
1410
+ s = sigma
1411
+ if s.ndim > 1:
1412
+ s = s.flatten()
1413
+ s_max = float(torch.max(s).item())
1414
+ s_min = float(torch.min(s).item())
1415
+ if sigma_seen["max"] is None:
1416
+ sigma_seen["max"] = s_max
1417
+ sigma_seen["min"] = s_min
1418
+ else:
1419
+ sigma_seen["max"] = max(sigma_seen["max"], s_max)
1420
+ sigma_seen["min"] = min(sigma_seen["min"], s_min)
1421
+ lo = max(1e-6, sigma_seen["min"])
1422
+ hi = max(lo * (1.0 + 1e-6), sigma_seen["max"])
1423
+ t = (torch.log(s + 1e-6) - torch.log(torch.tensor(lo, device=sigma.device))) / (torch.log(torch.tensor(hi, device=sigma.device)) - torch.log(torch.tensor(lo, device=sigma.device)) + 1e-6)
1424
+ t = t.clamp(0.0, 1.0)
1425
+ if t.ndim > 0:
1426
+ t_val = float(t.mean().item())
1427
+ else:
1428
+ t_val = float(t.item())
1429
+ cmin = float(max(0.0, cfg_sched_min))
1430
+ cmax = float(max(cmin, cfg_sched_max))
1431
+ tp = cfg_sched_type.lower()
1432
+ if tp == "cosine":
1433
+ import math
1434
+ cfg_val = cmax - (cmax - cmin) * 0.5 * (1.0 + math.cos(math.pi * t_val))
1435
+ elif tp in ("warmup", "warm-up", "linear"):
1436
+ g = float(max(0.0, min(1.0, t_val))) ** float(max(0.1, cfg_sched_gamma))
1437
+ cfg_val = cmin + (cmax - cmin) * g
1438
+ elif tp in ("u", "u-shape", "ushape"):
1439
+ e = 4.0 * (t_val - 0.5) * (t_val - 0.5)
1440
+ e = float(min(1.0, max(0.0, e)))
1441
+ e = e ** float(max(0.1, cfg_sched_u_pow))
1442
+ cfg_val = cmin + (cmax - cmin) * e
1443
+ else:
1444
+ cfg_val = cond_scale_eff
1445
+ cond_scale_eff = float(cfg_val) * float(curve_gain)
1446
+ except Exception:
1447
+ pass
1448
  # Allow hybrid switch per-step
1449
  mode = guidance_mode
1450
  if guidance_mode == "ZeResFDG":
 
1515
  lg = _local_gain_for((cond.shape[-2], cond.shape[-1]))
1516
  if lg is not None:
1517
  resid = resid * lg.expand(-1, resid.shape[1], -1, -1)
1518
+ noise_pred = uncond * alpha + cond_scale_eff * resid
1519
  return noise_pred
1520
 
1521
  # RescaleCFG/FDG path (with optional momentum/perp damping and S-curve shaping)
 
1563
  delta = delta * lg.expand(-1, delta.shape[1], -1, -1)
1564
  cond = uncond + delta
1565
 
1566
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1567
 
1568
  # Epsilon scaling (exposure bias correction): early steps get multiplier closer to (1 + eps_scale)
1569
  eps_mult = 1.0
 
2322
  __cade_noop = 0 # ensure non-empty with-block
2323
  # Latent buffer runtime state
2324
  lb_state = {"z_ema": None, "anchor": None, "drift_last": None, "ref_dist_last": None}
2325
+ # Pre-initialize EMA from the incoming latent so that a 2-iteration node already benefits on iter=1
2326
+ try:
2327
+ if bool(latent_buffer) and (iterations > 1):
2328
+ z0 = current_latent.get("samples", None)
2329
+ if isinstance(z0, torch.Tensor):
2330
+ lb_state["z_ema"] = z0.clone().detach()
2331
+ lb_state["anchor"] = z0.clone().detach()
2332
+ except Exception:
2333
+ pass
2334
 
2335
  # Preflight: reset sticky state and build external masks once (CPU-pinned)
2336
  try:
pressets/mg_cade25.cfg CHANGED
@@ -6,7 +6,7 @@ seed: 0
6
  control_after_generate: randomize
7
  steps: 10
8
  cfg: 8.0
9
- denoise: 1.0
10
  sampler_name: ddim
11
  scheduler: MGHybrid
12
  iterations: 2
@@ -22,7 +22,7 @@ clip_clean: true
22
  latent_compare: true
23
 
24
  # latent buffer (internal)
25
- latent_buffer: true
26
  lb_inject: 0.25
27
  lb_ema: 0.75
28
  lb_every: 1
@@ -52,10 +52,10 @@ ref_cooldown: 2
52
  # cfg schedule (internal)
53
  #cfg_sched: off | cosine | warmup | u
54
  cfg_sched: warmup
55
- #cfg_sched_min: 4.0
56
- #cfg_sched_max: 8.0
57
  cfg_sched_gamma: 1.5
58
- #cfg_sched_u_pow: 1.0
59
 
60
 
61
  # guidance
@@ -138,9 +138,9 @@ aq_attn: true
138
  # core
139
  seed: 0
140
  control_after_generate: randomize
141
- steps: 8
142
  cfg: 6.5
143
- denoise: 0.55
144
  sampler_name: ddim
145
  scheduler: MGHybrid
146
  iterations: 2
@@ -184,11 +184,11 @@ ref_cooldown: 2
184
 
185
  # cfg schedule (internal)
186
  #cfg_sched: off | cosine | warmup | u
187
- cfg_sched: cosine
188
- #cfg_sched_min: 3.0
189
- #cfg_sched_max: 6.5
190
- #cfg_sched_gamma: 1.5
191
- #cfg_sched_u_pow: 1.0
192
 
193
 
194
  # guidance
@@ -271,13 +271,13 @@ aq_attn: true
271
  # core
272
  seed: 0
273
  control_after_generate: randomize
274
- steps: 10
275
- cfg: 5.0
276
- denoise: 0.40
277
  sampler_name: ddim
278
  scheduler: MGHybrid
279
  iterations: 2
280
- steps_delta: 2.00
281
  cfg_delta: 0.03
282
  denoise_delta: 0.0500
283
 
@@ -296,7 +296,7 @@ latent_buffer: true
296
  lb_inject: 0.25
297
  lb_ema: 0.75
298
  lb_every: 1
299
- lb_anchor_every: 6
300
  lb_masked: true
301
  lb_rebase_thresh: 0.10
302
  lb_rebase_rate: 0.25
@@ -322,8 +322,8 @@ ref_cooldown: 2
322
  # cfg schedule (internal)
323
  #cfg_sched: off | cosine | warmup | u
324
  cfg_sched: warmup
325
- cfg_sched_min: 4.5
326
- cfg_sched_max: 5.0
327
  cfg_sched_gamma: 1.5
328
  cfg_sched_u_pow: 1.2
329
 
@@ -346,10 +346,10 @@ use_zero_init: false
346
  zero_init_steps: 0
347
 
348
  # FDG / ZE thresholds
349
- fdg_low: 0.35
350
- fdg_high: 0.7
351
  fdg_sigma: 1.10
352
- ze_res_zero_steps: 12
353
  ze_adaptive: true
354
  ze_r_switch_hi: 0.85
355
  ze_r_switch_lo: 0.25
@@ -397,7 +397,7 @@ midfreq_sigma_hi: 2.10
397
  # QSilk-AQClip-Lite (adaptive latent clipping)
398
  aqclip_enable: true
399
  aq_tile: 64
400
- aq_stride: 8
401
  aq_alpha: 2.0
402
 
403
  aq_attn: true
@@ -406,16 +406,16 @@ aq_attn: true
406
  # core
407
  seed: 0
408
  control_after_generate: randomize
409
- steps: 16
410
- cfg: 5.1
411
  #0.75
412
- denoise: 0.49
413
  sampler_name: ddim
414
  scheduler: MGHybrid
415
  iterations: 2
416
  steps_delta: 2.00
417
  cfg_delta: 1.00
418
- denoise_delta: 0.10
419
 
420
  # Smart seed toggle (disable for this step)
421
  smart_seed_enable: false
@@ -429,7 +429,7 @@ latent_compare: true
429
 
430
  # latent buffer (internal)
431
  latent_buffer: true
432
- lb_inject: 0.25
433
  lb_ema: 0.75
434
  lb_every: 1
435
  lb_anchor_every: 6
@@ -440,7 +440,8 @@ lb_rebase_rate: 0.25
440
  # detail controls
441
  ids_strength: 0.35
442
  upscale_method: lanczos
443
- scale_by: 1.5
 
444
  scale_delta: 0.1
445
  noise_offset: 0.0035
446
  threshold: 1.000
@@ -455,11 +456,11 @@ ref_cooldown: 2
455
 
456
  # cfg schedule (internal)
457
  #cfg_sched: off | cosine | warmup | u
458
- cfg_sched: cosine
459
- cfg_sched_min: 3.2
460
- cfg_sched_max: 5.6
461
  cfg_sched_gamma: 1.5
462
- cfg_sched_u_pow: 1.0
463
 
464
 
465
  # guidance
@@ -481,7 +482,7 @@ zero_init_steps: 0
481
 
482
  # FDG / ZE thresholds
483
  fdg_low: 0.35
484
- fdg_high: 1.15
485
  fdg_sigma: 1.20
486
  ze_res_zero_steps: 10
487
  ze_adaptive: true
@@ -512,7 +513,7 @@ clipseg_gain: 0.35
512
  clipseg_blend: fuse
513
  clipseg_ref_gate: true
514
  clipseg_ref_threshold: 0.005
515
- #seg_use_cf_edges: false
516
 
517
  # polish
518
  polish_enable: false
 
6
  control_after_generate: randomize
7
  steps: 10
8
  cfg: 8.0
9
+ denoise: 1.00
10
  sampler_name: ddim
11
  scheduler: MGHybrid
12
  iterations: 2
 
22
  latent_compare: true
23
 
24
  # latent buffer (internal)
25
+ latent_buffer: false
26
  lb_inject: 0.25
27
  lb_ema: 0.75
28
  lb_every: 1
 
52
  # cfg schedule (internal)
53
  #cfg_sched: off | cosine | warmup | u
54
  cfg_sched: warmup
55
+ cfg_sched_min: 6.0
56
+ cfg_sched_max: 8.8
57
  cfg_sched_gamma: 1.5
58
+ cfg_sched_u_pow: 1.0
59
 
60
 
61
  # guidance
 
138
  # core
139
  seed: 0
140
  control_after_generate: randomize
141
+ steps: 10
142
  cfg: 6.5
143
+ denoise: 0.56
144
  sampler_name: ddim
145
  scheduler: MGHybrid
146
  iterations: 2
 
184
 
185
  # cfg schedule (internal)
186
  #cfg_sched: off | cosine | warmup | u
187
+ cfg_sched: warmup
188
+ cfg_sched_min: 4.0
189
+ cfg_sched_max: 6.5
190
+ cfg_sched_gamma: 1.5
191
+ cfg_sched_u_pow: 1.0
192
 
193
 
194
  # guidance
 
271
  # core
272
  seed: 0
273
  control_after_generate: randomize
274
+ steps: 16
275
+ cfg: 7.0
276
+ denoise: 0.55
277
  sampler_name: ddim
278
  scheduler: MGHybrid
279
  iterations: 2
280
+ steps_delta: 4.00
281
  cfg_delta: 0.03
282
  denoise_delta: 0.0500
283
 
 
296
  lb_inject: 0.25
297
  lb_ema: 0.75
298
  lb_every: 1
299
+ lb_anchor_every: 4
300
  lb_masked: true
301
  lb_rebase_thresh: 0.10
302
  lb_rebase_rate: 0.25
 
322
  # cfg schedule (internal)
323
  #cfg_sched: off | cosine | warmup | u
324
  cfg_sched: warmup
325
+ cfg_sched_min: 5.8
326
+ cfg_sched_max: 7.8
327
  cfg_sched_gamma: 1.5
328
  cfg_sched_u_pow: 1.2
329
 
 
346
  zero_init_steps: 0
347
 
348
  # FDG / ZE thresholds
349
+ fdg_low: 0.15
350
+ fdg_high: 1.15
351
  fdg_sigma: 1.10
352
+ ze_res_zero_steps: 6
353
  ze_adaptive: true
354
  ze_r_switch_hi: 0.85
355
  ze_r_switch_lo: 0.25
 
397
  # QSilk-AQClip-Lite (adaptive latent clipping)
398
  aqclip_enable: true
399
  aq_tile: 64
400
+ aq_stride: 18
401
  aq_alpha: 2.0
402
 
403
  aq_attn: true
 
406
  # core
407
  seed: 0
408
  control_after_generate: randomize
409
+ steps: 20
410
+ cfg: 7
411
  #0.75
412
+ denoise: 0.50
413
  sampler_name: ddim
414
  scheduler: MGHybrid
415
  iterations: 2
416
  steps_delta: 2.00
417
  cfg_delta: 1.00
418
+ denoise_delta: 0.05
419
 
420
  # Smart seed toggle (disable for this step)
421
  smart_seed_enable: false
 
429
 
430
  # latent buffer (internal)
431
  latent_buffer: true
432
+ lb_inject: 0.30
433
  lb_ema: 0.75
434
  lb_every: 1
435
  lb_anchor_every: 6
 
440
  # detail controls
441
  ids_strength: 0.35
442
  upscale_method: lanczos
443
+ # 1.55
444
+ scale_by: 1.50
445
  scale_delta: 0.1
446
  noise_offset: 0.0035
447
  threshold: 1.000
 
456
 
457
  # cfg schedule (internal)
458
  #cfg_sched: off | cosine | warmup | u
459
+ cfg_sched: warmup
460
+ cfg_sched_min: 5.9
461
+ cfg_sched_max: 7.0
462
  cfg_sched_gamma: 1.5
463
+ cfg_sched_u_pow: 1.2
464
 
465
 
466
  # guidance
 
482
 
483
  # FDG / ZE thresholds
484
  fdg_low: 0.35
485
+ fdg_high: 0.90
486
  fdg_sigma: 1.20
487
  ze_res_zero_steps: 10
488
  ze_adaptive: true
 
513
  clipseg_blend: fuse
514
  clipseg_ref_gate: true
515
  clipseg_ref_threshold: 0.005
516
+ seg_use_cf_edges: true
517
 
518
  # polish
519
  polish_enable: false