Update app.py
Browse files
app.py
CHANGED
|
@@ -99,49 +99,34 @@ async def tts(ws: WebSocket):
|
|
| 99 |
text = req.get("text", "")
|
| 100 |
voice = req.get("voice", "Jakob")
|
| 101 |
|
| 102 |
-
ids, attn
|
| 103 |
-
past
|
| 104 |
-
offset_len = ids.size(1)
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
|
|
|
| 108 |
while True:
|
| 109 |
-
print(f"DEBUG: Before generate - past is None: {past is None}", flush=True) # Added logging
|
| 110 |
-
print(f"DEBUG: Before generate - type of past: {type(past) if past is not None else 'None'}", flush=True) # Added logging
|
| 111 |
-
# --- Mini‑Generate (StaticCache via cache_implementation) -------------------------------------------
|
| 112 |
gen = model.generate(
|
| 113 |
-
input_ids = ids if past is None else torch.tensor([[last_tok]], device=device),
|
| 114 |
-
attention_mask = attn if past is None else None,
|
| 115 |
-
past_key_values =
|
| 116 |
-
|
|
|
|
| 117 |
logits_processor=[masker],
|
| 118 |
do_sample=True, temperature=0.7, top_p=0.95,
|
| 119 |
-
use_cache=True,
|
| 120 |
-
|
| 121 |
-
#return_legacy_cache=True,
|
| 122 |
-
#cache_implementation="static" # Enabled StaticCache via implementation
|
| 123 |
)
|
| 124 |
-
print(f"DEBUG: After generate - type of gen.past_key_values: {type(gen.past_key_values)}", flush=True) # Added logging
|
| 125 |
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
new = seq[offset_len:]
|
| 129 |
-
if not new: # nichts -> fertig
|
| 130 |
break
|
| 131 |
-
offset_len += len(new)
|
| 132 |
-
|
| 133 |
-
# ----- Update past and last_tok (Cache Re-enabled) ---------
|
| 134 |
-
# ids = torch.tensor([seq], device=device) # Removed full sequence update
|
| 135 |
-
# attn = torch.ones_like(ids) # Removed full sequence update
|
| 136 |
-
#pkv = gen.past_key_values # Update past with the cache object returned by generate
|
| 137 |
-
print(f"DEBUG: After cache update - type of past: {type(past)}", flush=True) # Added logging
|
| 138 |
-
#if isinstance(pkv, StaticCache): pkv = pkv.to_legacy()
|
| 139 |
-
past = gen.past_key_values
|
| 140 |
-
print(f"DEBUG: After cache update - type of past: {type(past)}", flush=True) # Added logging
|
| 141 |
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
|
|
|
| 145 |
|
| 146 |
# ----- Token‑Handling ----------------------------------------
|
| 147 |
for t in new:
|
|
|
|
| 99 |
text = req.get("text", "")
|
| 100 |
voice = req.get("voice", "Jakob")
|
| 101 |
|
| 102 |
+
ids, attn = build_prompt(text, voice)
|
| 103 |
+
past = None
|
| 104 |
+
offset_len = ids.size(1)
|
| 105 |
+
cache_pos = offset_len - 1 # 0‑basiert
|
| 106 |
+
last_tok = None
|
| 107 |
+
buf = []
|
| 108 |
+
|
| 109 |
while True:
|
|
|
|
|
|
|
|
|
|
| 110 |
gen = model.generate(
|
| 111 |
+
input_ids = ids if past is None else torch.tensor([[last_tok]], device=device),
|
| 112 |
+
attention_mask = attn if past is None else None,
|
| 113 |
+
past_key_values = past,
|
| 114 |
+
cache_position = None if past is None else torch.tensor([cache_pos], device=device), # ← **NEU**
|
| 115 |
+
max_new_tokens = CHUNK_TOKENS,
|
| 116 |
logits_processor=[masker],
|
| 117 |
do_sample=True, temperature=0.7, top_p=0.95,
|
| 118 |
+
use_cache=True, return_dict_in_generate=True,
|
| 119 |
+
return_legacy_cache=False
|
|
|
|
|
|
|
| 120 |
)
|
|
|
|
| 121 |
|
| 122 |
+
new = gen.sequences[0, offset_len:].tolist()
|
| 123 |
+
if not new:
|
|
|
|
|
|
|
| 124 |
break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
|
| 126 |
+
offset_len += len(new)
|
| 127 |
+
cache_pos = offset_len - 1 # ← **NEU**
|
| 128 |
+
past = gen.past_key_values
|
| 129 |
+
last_tok = new[-1]
|
| 130 |
|
| 131 |
# ----- Token‑Handling ----------------------------------------
|
| 132 |
for t in new:
|