Create README.md
Browse files
    	
        README.md
    ADDED
    
    | @@ -0,0 +1,32 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            ---
         | 
| 2 | 
            +
            license: mit
         | 
| 3 | 
            +
            datasets:
         | 
| 4 | 
            +
            - amaai-lab/DisfluencySpeech
         | 
| 5 | 
            +
            language:
         | 
| 6 | 
            +
            - en
         | 
| 7 | 
            +
            pipeline_tag: text-to-speech
         | 
| 8 | 
            +
            ---
         | 
| 9 | 
            +
            # Usage
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            ```python
         | 
| 12 | 
            +
            from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub
         | 
| 13 | 
            +
            from fairseq.models.text_to_speech.hub_interface import TTSHubInterface
         | 
| 14 | 
            +
            import IPython.display as ipd
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            models, cfg, task = load_model_ensemble_and_task_from_hf_hub(
         | 
| 17 | 
            +
                "amaai-lab/DisfluencySpeech_BenchmarkB",
         | 
| 18 | 
            +
                arg_overrides={"vocoder": "hifigan", "fp16": False, "spec-bwd-max-iter": 32}
         | 
| 19 | 
            +
            )
         | 
| 20 | 
            +
            model = models[0]
         | 
| 21 | 
            +
            TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg)
         | 
| 22 | 
            +
            generator = task.build_generator(models, cfg)
         | 
| 23 | 
            +
             | 
| 24 | 
            +
            text = "Well, that's really funny, isn't it? What a strange world we live in."
         | 
| 25 | 
            +
             | 
| 26 | 
            +
            sample = TTSHubInterface.get_model_input(task, text)
         | 
| 27 | 
            +
            sample['net_input']['src_tokens'] = sample['net_input']['src_tokens'].cuda()
         | 
| 28 | 
            +
            sample['net_input']['src_lengths'] = sample['net_input']['src_lengths'].cuda()
         | 
| 29 | 
            +
            wav, rate = TTSHubInterface.get_prediction(task, model.cuda(), generator, sample)
         | 
| 30 | 
            +
             | 
| 31 | 
            +
            ipd.Audio(wav.cpu(), rate=rate)
         | 
| 32 | 
            +
            ```
         | 
