Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from gtts import gTTS | |
| from pydub import AudioSegment | |
| import tempfile | |
| import os | |
| import numpy as np | |
| # テンプレート設定 | |
| TEMPLATES = { | |
| "パラオ高め(ポーランドボール風)": {"rate": 180, "volume": 1.0}, | |
| "低めのナレーター": {"rate": 120, "volume": 0.8}, | |
| "普通の話し方": {"rate": 150, "volume": 1.0}, | |
| "元気な女の子": {"rate": 180, "volume": 1.2}, | |
| "落ち着いた男性": {"rate": 130, "volume": 0.9}, | |
| "ロボット風(機械的)": {"rate": 140, "volume": 1.0}, | |
| "さっぱりした女性": {"rate": 160, "volume": 1.1}, | |
| "しっとりした声": {"rate": 140, "volume": 0.9}, | |
| "おじさん風": {"rate": 60, "volume": 0.75}, | |
| "怒った声": {"rate": 45, "volume": 0.9}, | |
| } | |
| EFFECTS = ["なし", "ふわふわ化", "かちかち化", "減衰", "リバーブ", "音揺れ"] | |
| def generate_tts(text, template_name, pitch_factor=1.0, speed_factor=1.0, effect_type="なし", effect_strength=1.0): | |
| # テンプレートの設定を反映 | |
| template = TEMPLATES.get(template_name, {"rate": 150, "volume": 1.0}) | |
| rate = template["rate"] * speed_factor # 速度調整 | |
| volume = template["volume"] # ボリューム調整 | |
| # 音声合成(Gtts使用) | |
| tts = gTTS(text=text, lang='ja') | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f: | |
| tts_path = f.name | |
| tts.save(tts_path) | |
| # 音声読み込み | |
| sound = AudioSegment.from_mp3(tts_path) | |
| # ピッチ変更 | |
| sound = change_pitch(sound, pitch_factor) | |
| # 速度変更 | |
| sound = change_speed(sound, rate / 100) # 速度が「%」であることを考慮 | |
| # エフェクト適用 | |
| sound = apply_effect(sound, effect_type, effect_strength) | |
| # 一時ファイルに保存 | |
| output_path = tts_path.replace(".mp3", "_modified.mp3") | |
| sound.export(output_path, format="mp3") | |
| return output_path | |
| def change_pitch(sound, factor): | |
| new_frame_rate = int(sound.frame_rate * factor) | |
| pitched_sound = sound._spawn(sound.raw_data, overrides={"frame_rate": new_frame_rate}) | |
| return pitched_sound.set_frame_rate(44100) | |
| def change_speed(sound, speed=1.0): | |
| new_frame_rate = int(sound.frame_rate * speed) | |
| sped_up_sound = sound._spawn(sound.raw_data, overrides={"frame_rate": new_frame_rate}) | |
| return sped_up_sound.set_frame_rate(44100) | |
| def apply_effect(sound, effect_type, effect_strength): | |
| if effect_type == "ふわふわ化": | |
| return sound.low_pass_filter(1000 * effect_strength) | |
| elif effect_type == "かちかち化": | |
| return sound.high_pass_filter(3000 * effect_strength) | |
| elif effect_type == "減衰": | |
| return sound.fade_out(int(len(sound) * effect_strength)) | |
| elif effect_type == "リバーブ": | |
| reversed_sound = sound.reverse() | |
| faded = reversed_sound.fade_in(200 * effect_strength).fade_out(200 * effect_strength) | |
| return (sound + faded.reverse()) - (10 * effect_strength) | |
| elif effect_type == "音揺れ": | |
| return wobble(sound, effect_strength) | |
| else: | |
| return sound | |
| def wobble(sound, strength): | |
| # 0.1秒ごとにランダムにピッチを揺らす(揺れを強くする) | |
| chunk_ms = 100 | |
| chunks = [sound[i:i+chunk_ms] for i in range(0, len(sound), chunk_ms)] | |
| wobbled = AudioSegment.empty() | |
| for chunk in chunks: | |
| pitch_shift = np.random.uniform(1 - 0.05 * strength, 1 + 0.05 * strength) # 強めの揺れ | |
| chunk = change_pitch(chunk, pitch_shift) | |
| wobbled += chunk | |
| return wobbled | |
| with gr.Blocks() as app: | |
| gr.Markdown("# オリジナル声読み上げ機") | |
| with gr.Row(): | |
| text_input = gr.Textbox(label="読み上げるテキスト", lines=2, placeholder="ここに入力...") | |
| with gr.Row(): | |
| template_dropdown = gr.Dropdown(choices=list(TEMPLATES.keys()), value="パラオ高め(ポーランドボール風)", label="テンプレートを選ぶ") | |
| with gr.Row(): | |
| pitch_slider = gr.Slider(0.1, 5.0, value=1.0, step=0.05, label="ピッチ倍率(高く・低く)") | |
| speed_slider = gr.Slider(0.1, 5.0, value=1.0, step=0.05, label="速度倍率(速く・遅く)") | |
| with gr.Row(): | |
| effect_dropdown = gr.Dropdown(choices=EFFECTS, value="なし", label="エフェクトを選ぶ") | |
| effect_strength_slider = gr.Slider(0.1, 10.0, value=1.0, step=0.05, label="エフェクト強さ") | |
| with gr.Row(): | |
| submit_btn = gr.Button("生成する") | |
| audio_output = gr.Audio(label="出力音声", type="filepath") | |
| submit_btn.click( | |
| fn=generate_tts, | |
| inputs=[text_input, template_dropdown, pitch_slider, speed_slider, effect_dropdown, effect_strength_slider], | |
| outputs=audio_output | |
| ) | |
| app.launch() |