dev-mode-orpheus

Paused

App Files Files Community

Tomtom84 commited on Apr 20

Commit

5d73119

verified ·

1 Parent(s): 5031731

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -11

app.py CHANGED Viewed

@@ -107,37 +107,44 @@ def decode_block(block7: list[int]) -> bytes:
 async def tts(ws: WebSocket):
     await ws.accept()
     try:
-        req = json.loads(await ws.receive_text())
         text  = req.get("text", "")
         voice = req.get("voice", "Jakob")
-        ids, attn = build_inputs(text, voice)
         past      = None
         buf       = []
         while True:
             out = model.generate(
-                input_ids       = ids if past is None else None,
                 attention_mask  = attn if past is None else None,
                 past_key_values = past,
                 max_new_tokens  = CHUNK_TOKENS,
-                logits_processor= [masker],        # ► dynamischer Masker
                 do_sample=True, temperature=0.7, top_p=0.95,
                 return_dict_in_generate=True,
                 use_cache=True,
             )
-            # Cache & neue Tokens extrahieren --------------------------------
             pkv = out.past_key_values
-            if isinstance(pkv, Cache):             # HF >= 4.47
                 pkv = pkv.to_legacy()
             past = pkv
             new = out.sequences[0, -out.num_generated_tokens :].tolist()
-            print("new tokens:", new[:20])          # Debug‑Ausgabe
-            # ----------------------------------------------------------------
             for t in new:
                 if t == EOS_TOKEN:
                     raise StopIteration
@@ -149,9 +156,9 @@ async def tts(ws: WebSocket):
                 if len(buf) == 7:
                     await ws.send_bytes(decode_block(buf))
                     buf.clear()
-                    masker.blocks_done += 1         # ► jetzt darf ggf. EOS
-            # nächsten generate‑Step nur noch mit Cache, keine neuen ids
             ids, attn = None, None
     except (StopIteration, WebSocketDisconnect):
@@ -165,7 +172,7 @@ async def tts(ws: WebSocket):
             try:
                 await ws.close()
             except RuntimeError:
-                pass   # Close‑Frame war schon raus
 # ── 6. Lokaler Start (uvicorn) ───────────────────────────────────────
 if __name__ == "__main__":

 async def tts(ws: WebSocket):
     await ws.accept()
     try:
+        req   = json.loads(await ws.receive_text())
         text  = req.get("text", "")
         voice = req.get("voice", "Jakob")
+        ids, attn = build_inputs(text, voice)   # vollständiger Prompt
         past      = None
+        last_tok  = None                       # <- NEU
         buf       = []
         while True:
             out = model.generate(
+                input_ids       = ids if past is None else torch.tensor([[last_tok]], device=device),
                 attention_mask  = attn if past is None else None,
                 past_key_values = past,
                 max_new_tokens  = CHUNK_TOKENS,
+                logits_processor= [masker],
                 do_sample=True, temperature=0.7, top_p=0.95,
                 return_dict_in_generate=True,
                 use_cache=True,
+                return_legacy_cache=True,       # <- Warnung unterdrücken
             )
+            # ----- Cache & neue Token --------------------------------------
             pkv = out.past_key_values
+            if isinstance(pkv, Cache):          # HF >= 4.47
                 pkv = pkv.to_legacy()
             past = pkv
             new = out.sequences[0, -out.num_generated_tokens :].tolist()
+            print("new tokens:", new[:20])      # Debug‑Print
+            if not new:                         # Safety – nichts erzeugt
+                raise StopIteration
+            # ----- Token‑Handling ------------------------------------------
             for t in new:
+                last_tok = t                   # speichern für nächste Runde
                 if t == EOS_TOKEN:
                     raise StopIteration
                 if len(buf) == 7:
                     await ws.send_bytes(decode_block(buf))
                     buf.clear()
+                    masker.blocks_done += 1     # nach 1. Block darf EOS
+            # ab nächster Runde nur 1 Token + Cache
             ids, attn = None, None
     except (StopIteration, WebSocketDisconnect):
             try:
                 await ws.close()
             except RuntimeError:
+                pass
 # ── 6. Lokaler Start (uvicorn) ───────────────────────────────────────
 if __name__ == "__main__":