Spaces:
Sleeping
Sleeping
File size: 4,824 Bytes
60b21a4 dc049ca 60b21a4 2ff9b99 758024b dc049ca 60b21a4 758024b 66f6922 bc7b359 60b21a4 758024b dc049ca f499833 dc049ca 758024b dc049ca 758024b dc049ca 758024b dc049ca 758024b f499833 758024b dc049ca 758024b dc049ca 758024b dc049ca 758024b dc049ca 758024b dc049ca 758024b dc049ca 758024b dc049ca 758024b dc049ca 60b21a4 758024b dc049ca f499833 758024b f499833 758024b bc7b359 758024b dc049ca 758024b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
import gradio as gr
from gtts import gTTS
from pydub import AudioSegment
import tempfile
import os
import numpy as np
# テンプレート設定
TEMPLATES = {
"パラオ高め(ポーランドボール風)": {"rate": 180, "volume": 1.0},
"低めのナレーター": {"rate": 120, "volume": 0.8},
"普通の話し方": {"rate": 150, "volume": 1.0},
"元気な女の子": {"rate": 180, "volume": 1.2},
"落ち着いた男性": {"rate": 130, "volume": 0.9},
"ロボット風(機械的)": {"rate": 140, "volume": 1.0},
"さっぱりした女性": {"rate": 160, "volume": 1.1},
"しっとりした声": {"rate": 140, "volume": 0.9},
"おじさん風": {"rate": 60, "volume": 0.75},
"怒った声": {"rate": 45, "volume": 0.9},
}
EFFECTS = ["なし", "ふわふわ化", "かちかち化", "減衰", "リバーブ", "音揺れ"]
def generate_tts(text, template_name, pitch_factor=1.0, speed_factor=1.0, effect_type="なし", effect_strength=1.0):
# テンプレートの設定を反映
template = TEMPLATES.get(template_name, {"rate": 150, "volume": 1.0})
rate = template["rate"] * speed_factor # 速度調整
volume = template["volume"] # ボリューム調整
# 音声合成(Gtts使用)
tts = gTTS(text=text, lang='ja')
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
tts_path = f.name
tts.save(tts_path)
# 音声読み込み
sound = AudioSegment.from_mp3(tts_path)
# ピッチ変更
sound = change_pitch(sound, pitch_factor)
# 速度変更
sound = change_speed(sound, rate / 100) # 速度が「%」であることを考慮
# エフェクト適用
sound = apply_effect(sound, effect_type, effect_strength)
# 一時ファイルに保存
output_path = tts_path.replace(".mp3", "_modified.mp3")
sound.export(output_path, format="mp3")
return output_path
def change_pitch(sound, factor):
new_frame_rate = int(sound.frame_rate * factor)
pitched_sound = sound._spawn(sound.raw_data, overrides={"frame_rate": new_frame_rate})
return pitched_sound.set_frame_rate(44100)
def change_speed(sound, speed=1.0):
new_frame_rate = int(sound.frame_rate * speed)
sped_up_sound = sound._spawn(sound.raw_data, overrides={"frame_rate": new_frame_rate})
return sped_up_sound.set_frame_rate(44100)
def apply_effect(sound, effect_type, effect_strength):
if effect_type == "ふわふわ化":
return sound.low_pass_filter(1000 * effect_strength)
elif effect_type == "かちかち化":
return sound.high_pass_filter(3000 * effect_strength)
elif effect_type == "減衰":
return sound.fade_out(int(len(sound) * effect_strength))
elif effect_type == "リバーブ":
reversed_sound = sound.reverse()
faded = reversed_sound.fade_in(200 * effect_strength).fade_out(200 * effect_strength)
return (sound + faded.reverse()) - (10 * effect_strength)
elif effect_type == "音揺れ":
return wobble(sound, effect_strength)
else:
return sound
def wobble(sound, strength):
# 0.1秒ごとにランダムにピッチを揺らす(揺れを強くする)
chunk_ms = 100
chunks = [sound[i:i+chunk_ms] for i in range(0, len(sound), chunk_ms)]
wobbled = AudioSegment.empty()
for chunk in chunks:
pitch_shift = np.random.uniform(1 - 0.05 * strength, 1 + 0.05 * strength) # 強めの揺れ
chunk = change_pitch(chunk, pitch_shift)
wobbled += chunk
return wobbled
with gr.Blocks() as app:
gr.Markdown("# オリジナル声読み上げ機")
with gr.Row():
text_input = gr.Textbox(label="読み上げるテキスト", lines=2, placeholder="ここに入力...")
with gr.Row():
template_dropdown = gr.Dropdown(choices=list(TEMPLATES.keys()), value="パラオ高め(ポーランドボール風)", label="テンプレートを選ぶ")
with gr.Row():
pitch_slider = gr.Slider(0.1, 5.0, value=1.0, step=0.05, label="ピッチ倍率(高く・低く)")
speed_slider = gr.Slider(0.1, 5.0, value=1.0, step=0.05, label="速度倍率(速く・遅く)")
with gr.Row():
effect_dropdown = gr.Dropdown(choices=EFFECTS, value="なし", label="エフェクトを選ぶ")
effect_strength_slider = gr.Slider(0.1, 10.0, value=1.0, step=0.05, label="エフェクト強さ")
with gr.Row():
submit_btn = gr.Button("生成する")
audio_output = gr.Audio(label="出力音声", type="filepath")
submit_btn.click(
fn=generate_tts,
inputs=[text_input, template_dropdown, pitch_slider, speed_slider, effect_dropdown, effect_strength_slider],
outputs=audio_output
)
app.launch() |