Spaces:

Sakalti
/

Onsei-Tukuri

Sleeping

App Files Files Community

Sakalti commited on Apr 26

Commit

758024b

verified ·

1 Parent(s): 6538721

Update app.py

Browse files

Files changed (1) hide show

app.py +102 -50

app.py CHANGED Viewed

@@ -1,59 +1,111 @@
 import gradio as gr
-from gtts import gTTS
 from pydub import AudioSegment
 import tempfile
 # テンプレート設定
 TEMPLATES = {
-    "標準 (ピッチ1.0, 速度1.0)": (1.0, 1.0),
-    "パラオボール (高め, 少し速い)": (1.3, 1.2),
-    "ウサギモード (超高め, 超速い)": (2.0, 2.0),
-    "クマモード (低め, ゆっくり)": (0.8, 0.8),
-    "スローリーバード (普通ピッチ, めっちゃゆっくり)": (1.0, 0.5)
 }
-def change_pitch_speed(sound, pitch_factor=1.0, speed_factor=1.0):
-    new_sample_rate = int(sound.frame_rate * pitch_factor)
-    sound = sound._spawn(sound.raw_data, overrides={"frame_rate": new_sample_rate})
-    sound = sound.set_frame_rate(44100)
-    sound = sound.speedup(playback_speed=speed_factor)
-    return sound
-def read_text(text, template_name, custom_pitch, custom_speed, use_custom):
-    if use_custom:
-        pitch = custom_pitch
-        speed = custom_speed
     else:
-        pitch, speed = TEMPLATES.get(template_name, (1.0, 1.0))
-    # gTTSで音声合成
-    tts = gTTS(text=text, lang='ja')
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
-        tts.save(fp.name)
-        tts_filename = fp.name
-    sound = AudioSegment.from_file(tts_filename)
-    modified_sound = change_pitch_speed(sound, pitch_factor=pitch, speed_factor=speed)
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as out_fp:
-        output_filename = out_fp.name
-    modified_sound.export(output_filename, format="mp3")
-    return output_filename
-# Gradioインターフェース
-iface = gr.Interface(
-    fn=read_text,
-    inputs=[
-        gr.Textbox(label="読み上げるテキスト"),
-        gr.Dropdown(choices=list(TEMPLATES.keys()), value="標準 (ピッチ1.0, 速度1.0)", label="テンプレートを選択"),
-        gr.Slider(minimum=0.5, maximum=3.0, step=0.05, value=1.0, label="カスタムピッチ（使う場合のみ）"),
-        gr.Slider(minimum=0.5, maximum=3.0, step=0.05, value=1.0, label="カスタム速度（使う場合のみ）"),
-        gr.Checkbox(label="カスタム設定を使う（オンなら上のスライダー反映）", value=False)
-    ],
-    outputs=gr.Audio(label="生成された音声"),
-    title="パラオボール声 読み上げ機 gTTS版",
-    description="gTTSを使ったシンプル読み上げ機。ピッチと速度を自由に変えられます！"
-)
-iface.launch()

 import gradio as gr
+import pyttsx3
 from pydub import AudioSegment
+import numpy as np
 import tempfile
+import os
 # テンプレート設定
 TEMPLATES = {
+    "パラオ高め（ポーランドボール風）": {"rate": 180, "volume": 1.0},
+    "低めのナレーター": {"rate": 120, "volume": 0.8},
+    "普通の話し方": {"rate": 150, "volume": 1.0},
 }
+EFFECTS = ["なし", "ふわふわ化", "かちかち化", "減衰", "リバーブ", "音揺れ"]
+def generate_tts(text, template_name, pitch_factor=1.0, speed_factor=1.0, effect_type="なし"):
+    # 音声合成
+    engine = pyttsx3.init()
+    template = TEMPLATES[template_name]
+    engine.setProperty('rate', template["rate"])
+    engine.setProperty('volume', template["volume"])
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
+        tts_path = f.name
+    engine.save_to_file(text, tts_path)
+    engine.runAndWait()
+    # 音声読み込み
+    sound = AudioSegment.from_file(tts_path)
+    # ピッチ変更
+    sound = change_pitch(sound, pitch_factor)
+    # 速度変更
+    sound = change_speed(sound, speed_factor)
+    # エフェクト適用
+    sound = apply_effect(sound, effect_type)
+    # 一時ファイルに保存
+    output_path = tts_path.replace(".wav", "_modified.wav")
+    sound.export(output_path, format="wav")
+    return output_path
+def change_pitch(sound, factor):
+    new_frame_rate = int(sound.frame_rate * factor)
+    pitched_sound = sound._spawn(sound.raw_data, overrides={"frame_rate": new_frame_rate})
+    return pitched_sound.set_frame_rate(44100)
+def change_speed(sound, speed=1.0):
+    new_frame_rate = int(sound.frame_rate * speed)
+    sped_up_sound = sound._spawn(sound.raw_data, overrides={"frame_rate": new_frame_rate})
+    return sped_up_sound.set_frame_rate(44100)
+def apply_effect(sound, effect_type):
+    if effect_type == "ふわふわ化":
+        return sound.low_pass_filter(1000)
+    elif effect_type == "かちかち化":
+        return sound.high_pass_filter(3000)
+    elif effect_type == "減衰":
+        return sound.fade_out(len(sound))
+    elif effect_type == "リバーブ":
+        reversed_sound = sound.reverse()
+        faded = reversed_sound.fade_in(200).fade_out(200)
+        return (sound + faded.reverse()) - 10
+    elif effect_type == "音揺れ":
+        return wobble(sound)
     else:
+        return sound
+def wobble(sound):
+    # 0.2秒ごとにランダムにピッチを揺らす
+    chunk_ms = 200
+    chunks = [sound[i:i+chunk_ms] for i in range(0, len(sound), chunk_ms)]
+    wobbled = AudioSegment.empty()
+    for chunk in chunks:
+        pitch_shift = np.random.uniform(0.97, 1.03)  # ちょっと揺れる
+        chunk = change_pitch(chunk, pitch_shift)
+        wobbled += chunk
+    return wobbled
+with gr.Blocks() as app:
+    gr.Markdown("# オリジナル声読み上げ機")
+    with gr.Row():
+        text_input = gr.Textbox(label="読み上げるテキスト", lines=2, placeholder="ここに入力...")
+    with gr.Row():
+        template_dropdown = gr.Dropdown(choices=list(TEMPLATES.keys()), value="パラオ高め（ポーランドボール風）", label="テンプレートを選ぶ")
+    with gr.Row():
+        pitch_slider = gr.Slider(0.1, 5.0, value=1.0, step=0.05, label="ピッチ倍率（高く・低く）")
+        speed_slider = gr.Slider(0.1, 5.0, value=1.0, step=0.05, label="速度倍率（速く・遅く）")
+    with gr.Row():
+        effect_dropdown = gr.Dropdown(choices=EFFECTS, value="なし", label="エフェクトを選ぶ")
+    with gr.Row():
+        submit_btn = gr.Button("生成する")
+    audio_output = gr.Audio(label="出力音声", type="filepath")
+    submit_btn.click(
+        fn=generate_tts,
+        inputs=[text_input, template_dropdown, pitch_slider, speed_slider, effect_dropdown],
+        outputs=audio_output
+    )
+app.launch()