Commit
·
db001c3
1
Parent(s):
169ed8c
yet another _append_model_chunk_and_spool rewrite to fix silence gaps
Browse files- jam_worker.py +138 -106
jam_worker.py
CHANGED
|
@@ -445,24 +445,52 @@ class JamWorker(threading.Thread):
|
|
| 445 |
|
| 446 |
def _append_model_chunk_and_spool(self, wav: au.Waveform) -> None:
|
| 447 |
"""
|
| 448 |
-
|
| 449 |
-
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 457 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 458 |
|
| 459 |
# ---- unpack model-rate samples ----
|
| 460 |
s = wav.samples.astype(np.float32, copy=False)
|
| 461 |
if s.ndim == 1:
|
| 462 |
s = s[:, None]
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
|
|
|
|
|
|
| 466 |
|
| 467 |
# crossfade length in model samples
|
| 468 |
try:
|
|
@@ -471,110 +499,114 @@ class JamWorker(threading.Thread):
|
|
| 471 |
xfade_s = 0.0
|
| 472 |
xfade_n = int(round(max(0.0, xfade_s) * float(self._model_sr)))
|
| 473 |
|
| 474 |
-
#
|
| 475 |
-
|
| 476 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 477 |
|
| 478 |
# ------------------------------------------
|
| 479 |
-
# (A)
|
| 480 |
# ------------------------------------------
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
if
|
| 502 |
-
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 509 |
else:
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
self._spool_written += y_m.shape[0]
|
| 516 |
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
else:
|
| 523 |
-
# First-ever call or too-short to mix: maintain _model_stream minimally
|
| 524 |
-
if xfade_n > 0 and n_samps > xfade_n:
|
| 525 |
-
self._model_stream = s[xfade_n:].copy() if self._model_stream is None else np.concatenate([self._model_stream, s[xfade_n:]], axis=0)
|
| 526 |
-
else:
|
| 527 |
-
self._model_stream = s.copy() if self._model_stream is None else np.concatenate([self._model_stream, s], axis=0)
|
| 528 |
|
| 529 |
# ------------------------------------------
|
| 530 |
-
# (B)
|
| 531 |
# ------------------------------------------
|
| 532 |
-
|
| 533 |
-
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
body = s[xfade_n:, :]
|
| 546 |
-
print(f"[model] body(S) len={body.shape[0]} rms={_dbg_rms_dbfs_model(body):+.1f} dBFS")
|
| 547 |
-
y_body = to_target(body.astype(np.float32))
|
| 548 |
-
if y_body.size:
|
| 549 |
-
# DEBUG: body RMS in short-chunk path
|
| 550 |
-
print(f"[append] body(len=short) len={y_body.shape[0]} rms={_dbg_rms_dbfs(y_body):+.1f} dBFS")
|
| 551 |
-
self._spool = np.concatenate([self._spool, y_body], axis=0) if self._spool.size else y_body
|
| 552 |
-
self._spool_written += y_body.shape[0]
|
| 553 |
-
# No tail to remember this round
|
| 554 |
-
self._pending_tail_model = None
|
| 555 |
-
self._pending_tail_target_len = 0
|
| 556 |
-
return
|
| 557 |
-
|
| 558 |
-
# Tail (always remember how many TARGET samples we append)
|
| 559 |
-
if xfade_n > 0 and n_samps >= xfade_n:
|
| 560 |
-
tail = s[-xfade_n:, :]
|
| 561 |
-
print(f"[model] tail len={tail.shape[0]} rms={_dbg_rms_dbfs_model(tail):+.1f} dBFS")
|
| 562 |
-
y_tail = to_target(tail.astype(np.float32))
|
| 563 |
-
Ltail = int(y_tail.shape[0])
|
| 564 |
-
if Ltail:
|
| 565 |
-
# DEBUG: tail RMS we are appending now (to be corrected next call)
|
| 566 |
-
print(f"[append] tail len={y_tail.shape[0]} rms={_dbg_rms_dbfs(y_tail):+.1f} dBFS")
|
| 567 |
-
self._spool = np.concatenate([self._spool, y_tail], axis=0) if self._spool.size else y_tail
|
| 568 |
-
self._spool_written += Ltail
|
| 569 |
-
self._pending_tail_model = tail.copy()
|
| 570 |
-
self._pending_tail_target_len = Ltail
|
| 571 |
-
else:
|
| 572 |
-
# Nothing appended (resampler returned nothing yet) — keep model tail but mark zero target len
|
| 573 |
-
self._pending_tail_model = tail.copy()
|
| 574 |
-
self._pending_tail_target_len = 0
|
| 575 |
else:
|
| 576 |
-
|
| 577 |
self._pending_tail_target_len = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 578 |
|
| 579 |
|
| 580 |
|
|
|
|
| 445 |
|
| 446 |
def _append_model_chunk_and_spool(self, wav: au.Waveform) -> None:
|
| 447 |
"""
|
| 448 |
+
Append one MagentaRT chunk into the target-SR spool with an energy-aware,
|
| 449 |
+
deferred-overwrite crossfade to avoid writing near-silence at bar edges.
|
| 450 |
+
|
| 451 |
+
Key behavior:
|
| 452 |
+
- Append BODY and TAIL of *this* chunk right away (resampled to target SR).
|
| 453 |
+
- Keep THIS chunk's model-rate TAIL (+ its target-SR length if appended) to repair the
|
| 454 |
+
previous boundary on the *next* call by mixing (prev_tail*cos + new_head*sin).
|
| 455 |
+
- When the correction length Lpop would be 0 (e.g., tail produced no target samples last time),
|
| 456 |
+
we APPEND the mixed-overlap to bridge the gap instead of overwriting 0 samples.
|
| 457 |
+
- Before overwriting/appending the mixed-overlap, we guard against writing ultra-quiet audio
|
| 458 |
+
by normalizing it up (bounded) if it's >20 dB below the existing spool end.
|
| 459 |
+
|
| 460 |
+
This keeps your bar clock and external timing the same, but removes "bad starts" and fizzles.
|
| 461 |
"""
|
| 462 |
+
import math
|
| 463 |
+
import numpy as np
|
| 464 |
+
|
| 465 |
+
# ---- helpers ----
|
| 466 |
+
def _rms_dbfs(x: np.ndarray) -> float:
|
| 467 |
+
if x.size == 0:
|
| 468 |
+
return -120.0
|
| 469 |
+
if x.ndim == 2 and x.shape[1] > 1:
|
| 470 |
+
x_m = x.mean(axis=1, dtype=np.float32)
|
| 471 |
+
else:
|
| 472 |
+
x_m = x.astype(np.float32, copy=False).reshape(-1)
|
| 473 |
+
# guard for NaNs
|
| 474 |
+
x_m = np.nan_to_num(x_m, nan=0.0, posinf=0.0, neginf=0.0).astype(np.float32, copy=False)
|
| 475 |
+
r = float(np.sqrt(np.mean(x_m * x_m) + 1e-12))
|
| 476 |
+
return 20.0 * math.log10(max(r, 1e-12))
|
| 477 |
+
|
| 478 |
+
def _rms_dbfs_model(x: np.ndarray) -> float:
|
| 479 |
+
# same metric; named for clarity in logs
|
| 480 |
+
return _rms_dbfs(x)
|
| 481 |
+
|
| 482 |
+
def to_target(y: np.ndarray) -> np.ndarray:
|
| 483 |
+
return y if self._rs is None else self._rs.process(y, final=False)
|
| 484 |
|
| 485 |
# ---- unpack model-rate samples ----
|
| 486 |
s = wav.samples.astype(np.float32, copy=False)
|
| 487 |
if s.ndim == 1:
|
| 488 |
s = s[:, None]
|
| 489 |
+
if s.shape[1] == 1:
|
| 490 |
+
# ensure stereo shape for consistency with your spool (S,2)
|
| 491 |
+
s = np.repeat(s, 2, axis=1)
|
| 492 |
+
|
| 493 |
+
n_samps = int(s.shape[0])
|
| 494 |
|
| 495 |
# crossfade length in model samples
|
| 496 |
try:
|
|
|
|
| 499 |
xfade_s = 0.0
|
| 500 |
xfade_n = int(round(max(0.0, xfade_s) * float(self._model_sr)))
|
| 501 |
|
| 502 |
+
# carve head/body/tail in model domain
|
| 503 |
+
if xfade_n > 0 and n_samps >= (2 * xfade_n):
|
| 504 |
+
head_m = s[:xfade_n, :]
|
| 505 |
+
body_m = s[xfade_n:n_samps - xfade_n, :]
|
| 506 |
+
tail_m = s[n_samps - xfade_n:, :]
|
| 507 |
+
else:
|
| 508 |
+
# too short or no xfade configured — treat everything as body
|
| 509 |
+
head_m = np.zeros((0, 2), dtype=np.float32)
|
| 510 |
+
body_m = s
|
| 511 |
+
tail_m = np.zeros((0, 2), dtype=np.float32)
|
| 512 |
|
| 513 |
# ------------------------------------------
|
| 514 |
+
# (A) Repair the PREVIOUS boundary if we have a pending model-tail
|
| 515 |
# ------------------------------------------
|
| 516 |
+
did_boundary_mix = False
|
| 517 |
+
if (self._pending_tail_model is not None) and (xfade_n > 0) and (n_samps >= xfade_n):
|
| 518 |
+
# adaptive crossfade length when either side is very quiet
|
| 519 |
+
tail_prev_m = self._pending_tail_model
|
| 520 |
+
head_now_m = head_m
|
| 521 |
+
|
| 522 |
+
# safety: match shapes
|
| 523 |
+
if tail_prev_m.shape[1] != 2:
|
| 524 |
+
if tail_prev_m.ndim == 1:
|
| 525 |
+
tail_prev_m = tail_prev_m[:, None]
|
| 526 |
+
tail_prev_m = np.repeat(tail_prev_m[:, :1], 2, axis=1)
|
| 527 |
+
if head_now_m.shape[1] != 2:
|
| 528 |
+
if head_now_m.ndim == 1:
|
| 529 |
+
head_now_m = head_now_m[:, None]
|
| 530 |
+
head_now_m = np.repeat(head_now_m[:, :1], 2, axis=1)
|
| 531 |
+
|
| 532 |
+
# compute energy to decide whether to shorten xfade
|
| 533 |
+
tail_r = _rms_dbfs_model(tail_prev_m)
|
| 534 |
+
head_r = _rms_dbfs_model(head_now_m)
|
| 535 |
+
xfade_use = int(xfade_n)
|
| 536 |
+
if min(tail_r, head_r) < -45.0:
|
| 537 |
+
xfade_use = max(1, xfade_n // 4)
|
| 538 |
+
|
| 539 |
+
# windowed overlap (model domain)
|
| 540 |
+
Lm = min(xfade_use, tail_prev_m.shape[0], head_now_m.shape[0])
|
| 541 |
+
if Lm > 0:
|
| 542 |
+
t = np.linspace(0.0, math.pi / 2.0, Lm, endpoint=False, dtype=np.float32)[:, None]
|
| 543 |
+
cosw = np.cos(t, dtype=np.float32)
|
| 544 |
+
sinw = np.sin(t, dtype=np.float32)
|
| 545 |
+
mixed_m = tail_prev_m[-Lm:, :] * cosw + head_now_m[:Lm, :] * sinw
|
| 546 |
+
|
| 547 |
+
# resample to target and correct the end of the spool
|
| 548 |
+
y_mixed = to_target(mixed_m)
|
| 549 |
+
Lcorr = int(y_mixed.shape[0])
|
| 550 |
+
|
| 551 |
+
if Lcorr > 0:
|
| 552 |
+
# how many samples from last time's tail did we append?
|
| 553 |
+
# (may be zero if resampler yielded nothing then)
|
| 554 |
+
Lpop = int(min(self._pending_tail_target_len, self._spool.shape[0], Lcorr))
|
| 555 |
+
|
| 556 |
+
if Lpop > 0:
|
| 557 |
+
# energy-aware overwrite of last Lpop samples
|
| 558 |
+
prev_end = self._spool[-Lpop:, :]
|
| 559 |
+
new_seg = y_mixed[-Lpop:, :]
|
| 560 |
+
|
| 561 |
+
prev_r = _rms_dbfs(prev_end)
|
| 562 |
+
new_r = _rms_dbfs(new_seg)
|
| 563 |
+
|
| 564 |
+
# If the new overlap is >20 dB quieter than what's there, lift it (bounded)
|
| 565 |
+
if new_r < (prev_r - 20.0):
|
| 566 |
+
lift_db = max(0.0, min(20.0, (prev_r - 6.0) - new_r)) # cap boost; leave ~6 dB headroom
|
| 567 |
+
scale = 10.0 ** (lift_db / 20.0)
|
| 568 |
+
new_seg = np.clip(new_seg * scale, -1.0, 1.0).astype(np.float32, copy=False)
|
| 569 |
+
|
| 570 |
+
self._spool[-Lpop:, :] = new_seg
|
| 571 |
+
print(f"[append] mixedOverlap len={Lpop} rms={_rms_dbfs(new_seg):+.1f} dBFS")
|
| 572 |
else:
|
| 573 |
+
# Nothing to overwrite (e.g., last tail produced 0 target samples).
|
| 574 |
+
# Bridge by APPENDING the mixed-overlap.
|
| 575 |
+
self._spool = np.concatenate([self._spool, y_mixed], axis=0)
|
| 576 |
+
self._spool_written += int(y_mixed.shape[0])
|
| 577 |
+
print(f"[append] mixedOverlap len={y_mixed.shape[0]} rms={_rms_dbfs(y_mixed):+.1f} dBFS")
|
|
|
|
| 578 |
|
| 579 |
+
did_boundary_mix = True
|
| 580 |
+
|
| 581 |
+
# clear pending once we attempted the repair
|
| 582 |
+
self._pending_tail_model = None
|
| 583 |
+
self._pending_tail_target_len = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 584 |
|
| 585 |
# ------------------------------------------
|
| 586 |
+
# (B) Append this chunk's BODY then TAIL (target SR)
|
| 587 |
# ------------------------------------------
|
| 588 |
+
# BODY
|
| 589 |
+
y_body = to_target(body_m) if body_m.size else np.zeros((0, 2), dtype=np.float32)
|
| 590 |
+
if y_body.size:
|
| 591 |
+
self._spool = np.concatenate([self._spool, y_body], axis=0)
|
| 592 |
+
self._spool_written += int(y_body.shape[0])
|
| 593 |
+
print(f"[append] body len={y_body.shape[0] if y_body.size else 0} rms={_rms_dbfs(y_body):+.1f} dBFS")
|
| 594 |
+
|
| 595 |
+
# TAIL (we append now to keep continuity; on next call we'll correct the end)
|
| 596 |
+
y_tail = to_target(tail_m) if tail_m.size else np.zeros((0, 2), dtype=np.float32)
|
| 597 |
+
if y_tail.size:
|
| 598 |
+
self._spool = np.concatenate([self._spool, y_tail], axis=0)
|
| 599 |
+
self._spool_written += int(y_tail.shape[0])
|
| 600 |
+
self._pending_tail_target_len = int(y_tail.shape[0]) # how much we just added at target SR
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 601 |
else:
|
| 602 |
+
# resampler returned nothing for the tail; mark 0 so next Lpop==0
|
| 603 |
self._pending_tail_target_len = 0
|
| 604 |
+
print(f"[append] tail len={y_tail.shape[0] if y_tail.size else 0} rms={_rms_dbfs(y_tail):+.1f} dBFS")
|
| 605 |
+
|
| 606 |
+
# keep THIS chunk's model tail to mix with next chunk's head
|
| 607 |
+
# (even if y_tail had 0 target samples; in that case we'll bridge by appending mixed overlap)
|
| 608 |
+
self._pending_tail_model = tail_m if tail_m.size else None
|
| 609 |
+
|
| 610 |
|
| 611 |
|
| 612 |
|