Sakalti commited on
Commit
758024b
·
verified ·
1 Parent(s): 6538721

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -50
app.py CHANGED
@@ -1,59 +1,111 @@
1
  import gradio as gr
2
- from gtts import gTTS
3
  from pydub import AudioSegment
 
4
  import tempfile
 
5
 
6
  # テンプレート設定
7
  TEMPLATES = {
8
- "標準 (ピッチ1.0, 速度1.0)": (1.0, 1.0),
9
- "パラオボール (高め, 少し速い)": (1.3, 1.2),
10
- "ウサギモード (超高め, 超速い)": (2.0, 2.0),
11
- "クマモード (低め, ゆっくり)": (0.8, 0.8),
12
- "スローリーバード (普通ピッチ, めっちゃゆっくり)": (1.0, 0.5)
13
  }
14
 
15
- def change_pitch_speed(sound, pitch_factor=1.0, speed_factor=1.0):
16
- new_sample_rate = int(sound.frame_rate * pitch_factor)
17
- sound = sound._spawn(sound.raw_data, overrides={"frame_rate": new_sample_rate})
18
- sound = sound.set_frame_rate(44100)
19
- sound = sound.speedup(playback_speed=speed_factor)
20
- return sound
21
-
22
- def read_text(text, template_name, custom_pitch, custom_speed, use_custom):
23
- if use_custom:
24
- pitch = custom_pitch
25
- speed = custom_speed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  else:
27
- pitch, speed = TEMPLATES.get(template_name, (1.0, 1.0))
28
-
29
- # gTTSで音声合成
30
- tts = gTTS(text=text, lang='ja')
31
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
32
- tts.save(fp.name)
33
- tts_filename = fp.name
34
-
35
- sound = AudioSegment.from_file(tts_filename)
36
- modified_sound = change_pitch_speed(sound, pitch_factor=pitch, speed_factor=speed)
37
-
38
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as out_fp:
39
- output_filename = out_fp.name
40
- modified_sound.export(output_filename, format="mp3")
41
-
42
- return output_filename
43
-
44
- # Gradioインターフェース
45
- iface = gr.Interface(
46
- fn=read_text,
47
- inputs=[
48
- gr.Textbox(label="読み上げるテキスト"),
49
- gr.Dropdown(choices=list(TEMPLATES.keys()), value="標準 (ピッチ1.0, 速度1.0)", label="テンプレートを選択"),
50
- gr.Slider(minimum=0.5, maximum=3.0, step=0.05, value=1.0, label="カスタムピッチ(使う場合のみ)"),
51
- gr.Slider(minimum=0.5, maximum=3.0, step=0.05, value=1.0, label="カスタム速度(使う場合のみ)"),
52
- gr.Checkbox(label="カスタム設定を使う(オンなら上のスライダー反映)", value=False)
53
- ],
54
- outputs=gr.Audio(label="生成された音声"),
55
- title="パラオボール声 読み上げ機 gTTS版",
56
- description="gTTSを使ったシンプル読み上げ機。ピッチと速度を自由に変えられます!"
57
- )
58
-
59
- iface.launch()
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import pyttsx3
3
  from pydub import AudioSegment
4
+ import numpy as np
5
  import tempfile
6
+ import os
7
 
8
  # テンプレート設定
9
  TEMPLATES = {
10
+ "パラオ高め(ポーランドボール風)": {"rate": 180, "volume": 1.0},
11
+ "低めのナレーター": {"rate": 120, "volume": 0.8},
12
+ "普通の話し方": {"rate": 150, "volume": 1.0},
 
 
13
  }
14
 
15
+ EFFECTS = ["なし", "ふわふわ化", "かちかち化", "減衰", "リバーブ", "音揺れ"]
16
+
17
+ def generate_tts(text, template_name, pitch_factor=1.0, speed_factor=1.0, effect_type="なし"):
18
+ # 音声合成
19
+ engine = pyttsx3.init()
20
+ template = TEMPLATES[template_name]
21
+ engine.setProperty('rate', template["rate"])
22
+ engine.setProperty('volume', template["volume"])
23
+
24
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
25
+ tts_path = f.name
26
+ engine.save_to_file(text, tts_path)
27
+ engine.runAndWait()
28
+
29
+ # 音声読み込み
30
+ sound = AudioSegment.from_file(tts_path)
31
+
32
+ # ピッチ変更
33
+ sound = change_pitch(sound, pitch_factor)
34
+
35
+ # 速度変更
36
+ sound = change_speed(sound, speed_factor)
37
+
38
+ # エフェクト適用
39
+ sound = apply_effect(sound, effect_type)
40
+
41
+ # 一時ファイルに保存
42
+ output_path = tts_path.replace(".wav", "_modified.wav")
43
+ sound.export(output_path, format="wav")
44
+
45
+ return output_path
46
+
47
+ def change_pitch(sound, factor):
48
+ new_frame_rate = int(sound.frame_rate * factor)
49
+ pitched_sound = sound._spawn(sound.raw_data, overrides={"frame_rate": new_frame_rate})
50
+ return pitched_sound.set_frame_rate(44100)
51
+
52
+ def change_speed(sound, speed=1.0):
53
+ new_frame_rate = int(sound.frame_rate * speed)
54
+ sped_up_sound = sound._spawn(sound.raw_data, overrides={"frame_rate": new_frame_rate})
55
+ return sped_up_sound.set_frame_rate(44100)
56
+
57
+ def apply_effect(sound, effect_type):
58
+ if effect_type == "ふわふわ化":
59
+ return sound.low_pass_filter(1000)
60
+ elif effect_type == "かちかち化":
61
+ return sound.high_pass_filter(3000)
62
+ elif effect_type == "減衰":
63
+ return sound.fade_out(len(sound))
64
+ elif effect_type == "リバーブ":
65
+ reversed_sound = sound.reverse()
66
+ faded = reversed_sound.fade_in(200).fade_out(200)
67
+ return (sound + faded.reverse()) - 10
68
+ elif effect_type == "音揺れ":
69
+ return wobble(sound)
70
  else:
71
+ return sound
72
+
73
+ def wobble(sound):
74
+ # 0.2秒ごとにランダムにピッチを揺らす
75
+ chunk_ms = 200
76
+ chunks = [sound[i:i+chunk_ms] for i in range(0, len(sound), chunk_ms)]
77
+ wobbled = AudioSegment.empty()
78
+ for chunk in chunks:
79
+ pitch_shift = np.random.uniform(0.97, 1.03) # ちょっと揺れる
80
+ chunk = change_pitch(chunk, pitch_shift)
81
+ wobbled += chunk
82
+ return wobbled
83
+
84
+ with gr.Blocks() as app:
85
+ gr.Markdown("# オリジナル声読み上げ機")
86
+
87
+ with gr.Row():
88
+ text_input = gr.Textbox(label="読み上げるテキスト", lines=2, placeholder="ここに入力...")
89
+
90
+ with gr.Row():
91
+ template_dropdown = gr.Dropdown(choices=list(TEMPLATES.keys()), value="パラオ高め(ポーランドボール風)", label="テンプレートを選ぶ")
92
+
93
+ with gr.Row():
94
+ pitch_slider = gr.Slider(0.1, 5.0, value=1.0, step=0.05, label="ピッチ倍率(高く・低く)")
95
+ speed_slider = gr.Slider(0.1, 5.0, value=1.0, step=0.05, label="速度倍率(速く・遅く)")
96
+
97
+ with gr.Row():
98
+ effect_dropdown = gr.Dropdown(choices=EFFECTS, value="なし", label="エフェクトを選ぶ")
99
+
100
+ with gr.Row():
101
+ submit_btn = gr.Button("生成する")
102
+
103
+ audio_output = gr.Audio(label="出力音声", type="filepath")
104
+
105
+ submit_btn.click(
106
+ fn=generate_tts,
107
+ inputs=[text_input, template_dropdown, pitch_slider, speed_slider, effect_dropdown],
108
+ outputs=audio_output
109
+ )
110
+
111
+ app.launch()