Spaces:
Runtime error
Runtime error
modify fx norm
Browse files- inference.py +7 -3
inference.py
CHANGED
|
@@ -112,7 +112,7 @@ class MasteringStyleTransfer:
|
|
| 112 |
|
| 113 |
return all_results, min_loss_step
|
| 114 |
|
| 115 |
-
def preprocess_audio(self, audio, target_sample_rate=44100,
|
| 116 |
sample_rate, data = audio
|
| 117 |
|
| 118 |
# Normalize audio to -1 to 1 range
|
|
@@ -141,7 +141,7 @@ class MasteringStyleTransfer:
|
|
| 141 |
data = julius.resample_frac(torch.from_numpy(data), sample_rate, target_sample_rate).numpy()
|
| 142 |
|
| 143 |
# Apply fx normalization for input audio during mastering style transfer
|
| 144 |
-
if
|
| 145 |
data = self.fx_normalizer.normalize_audio(data, 'mixture')
|
| 146 |
|
| 147 |
# Convert to torch tensor
|
|
@@ -150,8 +150,12 @@ class MasteringStyleTransfer:
|
|
| 150 |
return data_tensor.to(self.device)
|
| 151 |
|
| 152 |
def process_audio(self, input_audio, reference_audio):
|
| 153 |
-
|
|
|
|
|
|
|
| 154 |
reference_tensor = self.preprocess_audio(reference_audio, self.args.sample_rate)
|
|
|
|
|
|
|
| 155 |
|
| 156 |
reference_feature = self.get_reference_embedding(reference_tensor)
|
| 157 |
|
|
|
|
| 112 |
|
| 113 |
return all_results, min_loss_step
|
| 114 |
|
| 115 |
+
def preprocess_audio(self, audio, target_sample_rate=44100, normalize=False):
|
| 116 |
sample_rate, data = audio
|
| 117 |
|
| 118 |
# Normalize audio to -1 to 1 range
|
|
|
|
| 141 |
data = julius.resample_frac(torch.from_numpy(data), sample_rate, target_sample_rate).numpy()
|
| 142 |
|
| 143 |
# Apply fx normalization for input audio during mastering style transfer
|
| 144 |
+
if normalize:
|
| 145 |
data = self.fx_normalizer.normalize_audio(data, 'mixture')
|
| 146 |
|
| 147 |
# Convert to torch tensor
|
|
|
|
| 150 |
return data_tensor.to(self.device)
|
| 151 |
|
| 152 |
def process_audio(self, input_audio, reference_audio):
|
| 153 |
+
print(f"input: {input_audio}")
|
| 154 |
+
print(f"reference: {reference_audio}")
|
| 155 |
+
input_tensor = self.preprocess_audio(input_audio, self.args.sample_rate, normalize=True)
|
| 156 |
reference_tensor = self.preprocess_audio(reference_audio, self.args.sample_rate)
|
| 157 |
+
print(f"input_tensor: {input_tensor.shape}")
|
| 158 |
+
print(f"reference_tensor: {reference_tensor.shape}")
|
| 159 |
|
| 160 |
reference_feature = self.get_reference_embedding(reference_tensor)
|
| 161 |
|