Spaces:
Running
Running
update
Browse files
app.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
import random
|
| 3 |
import time
|
| 4 |
|
| 5 |
from huggingface_hub import hf_hub_download
|
|
@@ -40,22 +39,10 @@ def mimi_streaming_test(input_wave, max_duration_sec=10.0):
|
|
| 40 |
sample_pcm = sample_pcm[None].to(device=device)
|
| 41 |
|
| 42 |
print("streaming encoding...")
|
| 43 |
-
|
| 44 |
-
|
| 45 |
|
| 46 |
-
|
| 47 |
-
for start_idx in range(0, sample_pcm.shape[-1], pcm_chunk_size):
|
| 48 |
-
end_idx = min(sample_pcm.shape[-1], start_idx + pcm_chunk_size)
|
| 49 |
-
chunk = sample_pcm[..., start_idx:end_idx]
|
| 50 |
-
with torch.no_grad():
|
| 51 |
-
codes = mimi.encode(chunk)
|
| 52 |
-
if codes.shape[-1]:
|
| 53 |
-
print(start_idx, codes.shape, end="\r")
|
| 54 |
-
all_codes.append(codes)
|
| 55 |
-
|
| 56 |
-
run_loop()
|
| 57 |
-
all_codes_th = torch.cat(all_codes, dim=-1)
|
| 58 |
-
print(f"codes {all_codes_th.shape} generated in {time.time() - start_time:.2f}s")
|
| 59 |
|
| 60 |
all_codes_list = [all_codes_th[:, :1, :],
|
| 61 |
all_codes_th[:, :2, :],
|
|
@@ -82,7 +69,7 @@ demo = gr.Interface(
|
|
| 82 |
# gr.Audio(type="numpy", label="With 8 codebooks"),
|
| 83 |
# gr.Audio(type="numpy", label="With 16 codebooks"),
|
| 84 |
gr.Audio(type="numpy", label="With 32 codebooks")],
|
| 85 |
-
examples= [["hello.mp3"]],
|
| 86 |
title="Mimi tokenizer playground",
|
| 87 |
description="Explore the quality of compression when using various number of code books in the Mimi model."
|
| 88 |
)
|
|
|
|
| 1 |
import gradio as gr
|
|
|
|
| 2 |
import time
|
| 3 |
|
| 4 |
from huggingface_hub import hf_hub_download
|
|
|
|
| 39 |
sample_pcm = sample_pcm[None].to(device=device)
|
| 40 |
|
| 41 |
print("streaming encoding...")
|
| 42 |
+
with torch.no_grad():
|
| 43 |
+
all_codes_th = mimi.encode(sample_pcm)
|
| 44 |
|
| 45 |
+
print(f"codes {all_codes_th.shape}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
all_codes_list = [all_codes_th[:, :1, :],
|
| 48 |
all_codes_th[:, :2, :],
|
|
|
|
| 69 |
# gr.Audio(type="numpy", label="With 8 codebooks"),
|
| 70 |
# gr.Audio(type="numpy", label="With 16 codebooks"),
|
| 71 |
gr.Audio(type="numpy", label="With 32 codebooks")],
|
| 72 |
+
examples= [["./hello.mp3"]],
|
| 73 |
title="Mimi tokenizer playground",
|
| 74 |
description="Explore the quality of compression when using various number of code books in the Mimi model."
|
| 75 |
)
|