Spaces:
Running
Running
Update inference2.py
Browse files- inference2.py +10 -5
inference2.py
CHANGED
|
@@ -235,14 +235,16 @@ def run_inference(
|
|
| 235 |
frame = frame[y1:y2, x1:x2]
|
| 236 |
full_frames.append(frame)
|
| 237 |
|
| 238 |
-
print
|
| 239 |
if not full_frames:
|
| 240 |
raise ValueError("No frames could be read from the input face file.")
|
| 241 |
|
| 242 |
temp_audio_path = os.path.join(temp_dir, 'temp_audio.wav')
|
|
|
|
|
|
|
| 243 |
if not audio_path.endswith('.wav'):
|
| 244 |
print('Extracting raw audio...')
|
| 245 |
-
command = f'ffmpeg -y -i "{audio_path}" -
|
| 246 |
try:
|
| 247 |
subprocess.run(command, shell=True, check=True, capture_output=True)
|
| 248 |
audio_path = temp_audio_path
|
|
@@ -250,15 +252,18 @@ def run_inference(
|
|
| 250 |
print(f"FFmpeg error: {e.stderr.decode()}")
|
| 251 |
raise RuntimeError(f"Failed to extract audio from {audio_path}. Error: {e.stderr.decode()}")
|
| 252 |
else:
|
| 253 |
-
# Copy the wav file to temp if it's already wav to maintain consistency in naming
|
| 254 |
shutil.copy(audio_path, temp_audio_path)
|
| 255 |
audio_path = temp_audio_path
|
| 256 |
|
| 257 |
-
|
| 258 |
wav = audio.load_wav(audio_path, 16000)
|
| 259 |
-
# >>> CRUCIAL FIX: Explicitly cast to float32 for resampy/numba compatibility <<<
|
| 260 |
wav = wav.astype(np.float32)
|
| 261 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
mel = audio.melspectrogram(wav)
|
| 263 |
print("Mel spectrogram shape:", mel.shape)
|
| 264 |
|
|
|
|
| 235 |
frame = frame[y1:y2, x1:x2]
|
| 236 |
full_frames.append(frame)
|
| 237 |
|
| 238 |
+
print("Number of frames available for inference:", len(full_frames))
|
| 239 |
if not full_frames:
|
| 240 |
raise ValueError("No frames could be read from the input face file.")
|
| 241 |
|
| 242 |
temp_audio_path = os.path.join(temp_dir, 'temp_audio.wav')
|
| 243 |
+
|
| 244 |
+
# Updated FFmpeg command: force mono, 16-bit, 16kHz
|
| 245 |
if not audio_path.endswith('.wav'):
|
| 246 |
print('Extracting raw audio...')
|
| 247 |
+
command = f'ffmpeg -y -i "{audio_path}" -ac 1 -ar 16000 -sample_fmt s16 "{temp_audio_path}"'
|
| 248 |
try:
|
| 249 |
subprocess.run(command, shell=True, check=True, capture_output=True)
|
| 250 |
audio_path = temp_audio_path
|
|
|
|
| 252 |
print(f"FFmpeg error: {e.stderr.decode()}")
|
| 253 |
raise RuntimeError(f"Failed to extract audio from {audio_path}. Error: {e.stderr.decode()}")
|
| 254 |
else:
|
|
|
|
| 255 |
shutil.copy(audio_path, temp_audio_path)
|
| 256 |
audio_path = temp_audio_path
|
| 257 |
|
| 258 |
+
# Load WAV audio
|
| 259 |
wav = audio.load_wav(audio_path, 16000)
|
|
|
|
| 260 |
wav = wav.astype(np.float32)
|
| 261 |
|
| 262 |
+
# Check audio length
|
| 263 |
+
print(f"Extracted audio samples: {len(wav)}, duration: {len(wav)/16000:.2f} sec")
|
| 264 |
+
if len(wav) < 16000:
|
| 265 |
+
raise ValueError(f"Audio is too short after conversion: only {len(wav)} samples. Please upload a longer clip.")
|
| 266 |
+
|
| 267 |
mel = audio.melspectrogram(wav)
|
| 268 |
print("Mel spectrogram shape:", mel.shape)
|
| 269 |
|