GPT-SoVITS-ProPlus

Runtime error

App Files Files Community

XXXXRT666 commited on Jun 27

Commit

7bdf3c3

1 Parent(s): d2e713a

Fix

Browse files

Files changed (5) hide show

AR/models/structs.py +1 -1
AR/models/t2s_model_flash_attn.py +21 -15
inference_webui.py +1 -0
pre-requirements.txt +2 -1
requirements.txt +1 -2

AR/models/structs.py CHANGED Viewed

@@ -68,7 +68,7 @@ class T2SSession:
             self.xy_dec_ = torch.rand((bsz, 1, decoder.embedding_dim)).to(dtype)
             # EOS
-            self.completed = [False] * len(self.x)
             self.y_results: List[Tensor] = [None] * len(self.x)  # type: ignore
             self.xy_pos = decoder.embed(self.x, self.y, self.bert_feature)

             self.xy_dec_ = torch.rand((bsz, 1, decoder.embedding_dim)).to(dtype)
             # EOS
+            self.completed = torch.Tensor([False] * len(self.x)).bool().to(device)
             self.y_results: List[Tensor] = [None] * len(self.x)  # type: ignore
             self.xy_pos = decoder.embed(self.x, self.y, self.bert_feature)

AR/models/t2s_model_flash_attn.py CHANGED Viewed

@@ -245,7 +245,6 @@ class CUDAGraphRunner:
                                 **kwds,
                             )
-                        torch_profiler.start()
                         with torch_profiler.record("AR"):
                             if session.graph:
                                 session.xy_pos_.copy_(session.xy_pos)
@@ -275,22 +274,28 @@ class CUDAGraphRunner:
                             top_p=request.top_p,
                             repetition_penalty=request.repetition_penalty,
                             temperature=request.temperature,
-                            use_cuda_graph=False,
                             idx=idx,
                         )
                         session.y = torch.cat([session.y, samples], dim=1)
                     with torch_profiler.record("EOS"):
-                        EOS_mask = (samples[:, 0] == decoder.EOS) | (torch.argmax(logits, dim=-1) == decoder.EOS)
-                        EOS_indices: List[int] = torch.where(EOS_mask)[0].tolist()
-                        for i in EOS_indices:
-                            if not session.completed[i]:
-                                session.y_results[i] = session.y[i, session.y_len : -1]
-                                session.completed[i] = True
-                        if all(session.completed):
                             if session.y.size(1) == 0:
                                 session.y = torch.cat([session.y, torch.zeros_like(samples)], dim=1)
                                 tqdm.write("Bad Zero Prediction")
@@ -306,7 +311,7 @@ class CUDAGraphRunner:
                             and (session.y.size(1) - session.y_len) > request.early_stop_num
                         ):
                             for i in range(bsz):
-                                if not session.completed[i]:
                                     session.y_results[i] = session.y[i, session.y_len :]
                                     session.completed[i] = True
                             break
@@ -316,10 +321,11 @@ class CUDAGraphRunner:
                         session.xy_pos = decoder.ar_audio_position.forward(session.input_pos - session.x_lens, y_emb)
                     if idx == 2:
                         t1 = time.perf_counter()
-                    if idx == 51:
-                        torch_profiler.end()
             match session.device.type:
                 case "cuda":
@@ -331,7 +337,7 @@ class CUDAGraphRunner:
                 case "mtia":
                     torch.mtia.empty_cache()
             gc.collect()
             return session.y_results[: request.valid_length]
     def generate(self, request: T2SRequest):

                                 **kwds,
                             )
                         with torch_profiler.record("AR"):
                             if session.graph:
                                 session.xy_pos_.copy_(session.xy_pos)
                             top_p=request.top_p,
                             repetition_penalty=request.repetition_penalty,
                             temperature=request.temperature,
+                            use_cuda_graph=request.use_cuda_graph,
                             idx=idx,
                         )
                         session.y = torch.cat([session.y, samples], dim=1)
                     with torch_profiler.record("EOS"):
+                        argmax_token = torch.argmax(logits, dim=-1)
+                        sample_token = samples.squeeze(1)
+                        EOS_mask = (argmax_token == decoder.EOS) | (sample_token == decoder.EOS)
+                    with torch_profiler.record("EOS1"):
+                        newly_done_mask = EOS_mask & (~session.completed)
+                    with torch_profiler.record("EOS2"):
+                        newly_done_indices = newly_done_mask.nonzero()
+                    with torch_profiler.record("EOS3"):
+                        if newly_done_indices.numel() > 0:
+                            session.y_results[newly_done_indices[0]] = session.y[
+                                newly_done_indices[0], session.y_len : -1
+                            ].squeeze(0)
+                            session.completed[newly_done_indices] = True
+                    with torch_profiler.record("EOS4"):
+                        if torch.all(session.completed).item():
                             if session.y.size(1) == 0:
                                 session.y = torch.cat([session.y, torch.zeros_like(samples)], dim=1)
                                 tqdm.write("Bad Zero Prediction")
                             and (session.y.size(1) - session.y_len) > request.early_stop_num
                         ):
                             for i in range(bsz):
+                                if not session.completed[i].item():
                                     session.y_results[i] = session.y[i, session.y_len :]
                                     session.completed[i] = True
                             break
                         session.xy_pos = decoder.ar_audio_position.forward(session.input_pos - session.x_lens, y_emb)
                     if idx == 2:
+                        torch_profiler.start()
                         t1 = time.perf_counter()
+                    # if idx == 51:
+                    #     torch_profiler.end()
             match session.device.type:
                 case "cuda":
                 case "mtia":
                     torch.mtia.empty_cache()
             gc.collect()
+            torch_profiler.end()
             return session.y_results[: request.valid_length]
     def generate(self, request: T2SRequest):

inference_webui.py CHANGED Viewed

@@ -836,4 +836,5 @@ if __name__ == "__main__":
         server_name="0.0.0.0",
         inbrowser=True,
         show_api=False,
     )

         server_name="0.0.0.0",
         inbrowser=True,
         show_api=False,
+        server_port=1111,
     )

pre-requirements.txt CHANGED Viewed

	@@ -1 +1,2 @@
1	- torch==2.5.1


1	+ torch==2.5.1
2	+ torchaudio

requirements.txt CHANGED Viewed

@@ -3,7 +3,6 @@ scipy>=1.11.3
 tensorboard==2.15.1
 librosa==0.9.2
 numba==0.56.4
-torchaudio
 pytorch-lightning>=2.4
 gradio==4.44.1
 gradio_client==1.3.0
@@ -36,4 +35,4 @@ nltk==3.8.1
 fast_langdetect==0.3.1
 split_lang==2.1.0
 ToJyutping==3.2.0
-https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.5cxx11abiTRUE-cp310-cp310-linux_x86_64.whl

 tensorboard==2.15.1
 librosa==0.9.2
 numba==0.56.4
 pytorch-lightning>=2.4
 gradio==4.44.1
 gradio_client==1.3.0
 fast_langdetect==0.3.1
 split_lang==2.1.0
 ToJyutping==3.2.0
+https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl