Spaces:

Sakalti
/

Onsei-Tukuri

Sleeping

App Files Files Community

Onsei-Tukuri / app.py

Sakalti

Update app.py

bc7b359 verified 7 months ago

raw

history blame contribute delete

4.82 kB

	import gradio as gr
	from gtts import gTTS
	from pydub import AudioSegment
	import tempfile
	import os
	import numpy as np

	# テンプレート設定
	TEMPLATES = {
	"パラオ高め（ポーランドボール風）": {"rate": 180, "volume": 1.0},
	"低めのナレーター": {"rate": 120, "volume": 0.8},
	"普通の話し方": {"rate": 150, "volume": 1.0},
	"元気な女の子": {"rate": 180, "volume": 1.2},
	"落ち着いた男性": {"rate": 130, "volume": 0.9},
	"ロボット風（機械的）": {"rate": 140, "volume": 1.0},
	"さっぱりした女性": {"rate": 160, "volume": 1.1},
	"しっとりした声": {"rate": 140, "volume": 0.9},
	"おじさん風": {"rate": 60, "volume": 0.75},
	"怒った声": {"rate": 45, "volume": 0.9},
	}

	EFFECTS = ["なし", "ふわふわ化", "かちかち化", "減衰", "リバーブ", "音揺れ"]

	def generate_tts(text, template_name, pitch_factor=1.0, speed_factor=1.0, effect_type="なし", effect_strength=1.0):
	# テンプレートの設定を反映
	template = TEMPLATES.get(template_name, {"rate": 150, "volume": 1.0})
	rate = template["rate"] * speed_factor # 速度調整
	volume = template["volume"] # ボリューム調整

	# 音声合成（Gtts使用）
	tts = gTTS(text=text, lang='ja')

	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
	tts_path = f.name
	tts.save(tts_path)

	# 音声読み込み
	sound = AudioSegment.from_mp3(tts_path)

	# ピッチ変更
	sound = change_pitch(sound, pitch_factor)

	# 速度変更
	sound = change_speed(sound, rate / 100) # 速度が「%」であることを考慮

	# エフェクト適用
	sound = apply_effect(sound, effect_type, effect_strength)

	# 一時ファイルに保存
	output_path = tts_path.replace(".mp3", "_modified.mp3")
	sound.export(output_path, format="mp3")

	return output_path

	def change_pitch(sound, factor):
	new_frame_rate = int(sound.frame_rate * factor)
	pitched_sound = sound._spawn(sound.raw_data, overrides={"frame_rate": new_frame_rate})
	return pitched_sound.set_frame_rate(44100)

	def change_speed(sound, speed=1.0):
	new_frame_rate = int(sound.frame_rate * speed)
	sped_up_sound = sound._spawn(sound.raw_data, overrides={"frame_rate": new_frame_rate})
	return sped_up_sound.set_frame_rate(44100)

	def apply_effect(sound, effect_type, effect_strength):
	if effect_type == "ふわふわ化":
	return sound.low_pass_filter(1000 * effect_strength)
	elif effect_type == "かちかち化":
	return sound.high_pass_filter(3000 * effect_strength)
	elif effect_type == "減衰":
	return sound.fade_out(int(len(sound) * effect_strength))
	elif effect_type == "リバーブ":
	reversed_sound = sound.reverse()
	faded = reversed_sound.fade_in(200 * effect_strength).fade_out(200 * effect_strength)
	return (sound + faded.reverse()) - (10 * effect_strength)
	elif effect_type == "音揺れ":
	return wobble(sound, effect_strength)
	else:
	return sound

	def wobble(sound, strength):
	# 0.1秒ごとにランダムにピッチを揺らす（揺れを強くする）
	chunk_ms = 100
	chunks = [sound[i:i+chunk_ms] for i in range(0, len(sound), chunk_ms)]
	wobbled = AudioSegment.empty()
	for chunk in chunks:
	pitch_shift = np.random.uniform(1 - 0.05 * strength, 1 + 0.05 * strength) # 強めの揺れ
	chunk = change_pitch(chunk, pitch_shift)
	wobbled += chunk
	return wobbled

	with gr.Blocks() as app:
	gr.Markdown("# オリジナル声読み上げ機")

	with gr.Row():
	text_input = gr.Textbox(label="読み上げるテキスト", lines=2, placeholder="ここに入力...")

	with gr.Row():
	template_dropdown = gr.Dropdown(choices=list(TEMPLATES.keys()), value="パラオ高め（ポーランドボール風）", label="テンプレートを選ぶ")

	with gr.Row():
	pitch_slider = gr.Slider(0.1, 5.0, value=1.0, step=0.05, label="ピッチ倍率（高く・低く）")
	speed_slider = gr.Slider(0.1, 5.0, value=1.0, step=0.05, label="速度倍率（速く・遅く）")

	with gr.Row():
	effect_dropdown = gr.Dropdown(choices=EFFECTS, value="なし", label="エフェクトを選ぶ")
	effect_strength_slider = gr.Slider(0.1, 10.0, value=1.0, step=0.05, label="エフェクト強さ")

	with gr.Row():
	submit_btn = gr.Button("生成する")

	audio_output = gr.Audio(label="出力音声", type="filepath")

	submit_btn.click(
	fn=generate_tts,
	inputs=[text_input, template_dropdown, pitch_slider, speed_slider, effect_dropdown, effect_strength_slider],
	outputs=audio_output
	)

	app.launch()