Sakalti commited on
Commit
dc049ca
·
verified ·
1 Parent(s): 66f6922

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -25
app.py CHANGED
@@ -1,9 +1,9 @@
1
  import gradio as gr
2
- import pyttsx3
3
  from pydub import AudioSegment
4
- import numpy as np
5
  import tempfile
6
  import os
 
7
 
8
  # テンプレート設定
9
  TEMPLATES = {
@@ -21,20 +21,16 @@ TEMPLATES = {
21
 
22
  EFFECTS = ["なし", "ふわふわ化", "かちかち化", "減衰", "リバーブ", "音揺れ"]
23
 
24
- def generate_tts(text, template_name, pitch_factor=1.0, speed_factor=1.0, effect_type="なし"):
25
- # 音声合成
26
- engine = pyttsx3.init()
27
- template = TEMPLATES[template_name]
28
- engine.setProperty('rate', template["rate"])
29
- engine.setProperty('volume', template["volume"])
30
 
31
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
32
  tts_path = f.name
33
- engine.save_to_file(text, tts_path)
34
- engine.runAndWait()
35
 
36
  # 音声読み込み
37
- sound = AudioSegment.from_file(tts_path)
38
 
39
  # ピッチ変更
40
  sound = change_pitch(sound, pitch_factor)
@@ -43,11 +39,11 @@ def generate_tts(text, template_name, pitch_factor=1.0, speed_factor=1.0, effect
43
  sound = change_speed(sound, speed_factor)
44
 
45
  # エフェクト適用
46
- sound = apply_effect(sound, effect_type)
47
 
48
  # 一時ファイルに保存
49
- output_path = tts_path.replace(".wav", "_modified.wav")
50
- sound.export(output_path, format="wav")
51
 
52
  return output_path
53
 
@@ -61,29 +57,29 @@ def change_speed(sound, speed=1.0):
61
  sped_up_sound = sound._spawn(sound.raw_data, overrides={"frame_rate": new_frame_rate})
62
  return sped_up_sound.set_frame_rate(44100)
63
 
64
- def apply_effect(sound, effect_type):
65
  if effect_type == "ふわふわ化":
66
- return sound.low_pass_filter(1000)
67
  elif effect_type == "かちかち化":
68
- return sound.high_pass_filter(3000)
69
  elif effect_type == "減衰":
70
- return sound.fade_out(len(sound))
71
  elif effect_type == "リバーブ":
72
  reversed_sound = sound.reverse()
73
- faded = reversed_sound.fade_in(200).fade_out(200)
74
- return (sound + faded.reverse()) - 10
75
  elif effect_type == "音揺れ":
76
- return wobble(sound)
77
  else:
78
  return sound
79
 
80
- def wobble(sound):
81
  # 0.2秒ごとにランダムにピッチを揺らす
82
  chunk_ms = 200
83
  chunks = [sound[i:i+chunk_ms] for i in range(0, len(sound), chunk_ms)]
84
  wobbled = AudioSegment.empty()
85
  for chunk in chunks:
86
- pitch_shift = np.random.uniform(0.97, 1.03) # ちょっと揺れる
87
  chunk = change_pitch(chunk, pitch_shift)
88
  wobbled += chunk
89
  return wobbled
@@ -103,6 +99,7 @@ with gr.Blocks() as app:
103
 
104
  with gr.Row():
105
  effect_dropdown = gr.Dropdown(choices=EFFECTS, value="なし", label="エフェクトを選ぶ")
 
106
 
107
  with gr.Row():
108
  submit_btn = gr.Button("生成する")
@@ -111,7 +108,7 @@ with gr.Blocks() as app:
111
 
112
  submit_btn.click(
113
  fn=generate_tts,
114
- inputs=[text_input, template_dropdown, pitch_slider, speed_slider, effect_dropdown],
115
  outputs=audio_output
116
  )
117
 
 
1
  import gradio as gr
2
+ from gtts import gTTS
3
  from pydub import AudioSegment
 
4
  import tempfile
5
  import os
6
+ import numpy as np
7
 
8
  # テンプレート設定
9
  TEMPLATES = {
 
21
 
22
  EFFECTS = ["なし", "ふわふわ化", "かちかち化", "減衰", "リバーブ", "音揺れ"]
23
 
24
+ def generate_tts(text, template_name, pitch_factor=1.0, speed_factor=1.0, effect_type="なし", effect_strength=1.0):
25
+ # 音声合成(Gtts使用)
26
+ tts = gTTS(text=text, lang='ja')
 
 
 
27
 
28
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
29
  tts_path = f.name
30
+ tts.save(tts_path)
 
31
 
32
  # 音声読み込み
33
+ sound = AudioSegment.from_mp3(tts_path)
34
 
35
  # ピッチ変更
36
  sound = change_pitch(sound, pitch_factor)
 
39
  sound = change_speed(sound, speed_factor)
40
 
41
  # エフェクト適用
42
+ sound = apply_effect(sound, effect_type, effect_strength)
43
 
44
  # 一時ファイルに保存
45
+ output_path = tts_path.replace(".mp3", "_modified.mp3")
46
+ sound.export(output_path, format="mp3")
47
 
48
  return output_path
49
 
 
57
  sped_up_sound = sound._spawn(sound.raw_data, overrides={"frame_rate": new_frame_rate})
58
  return sped_up_sound.set_frame_rate(44100)
59
 
60
+ def apply_effect(sound, effect_type, effect_strength):
61
  if effect_type == "ふわふわ化":
62
+ return sound.low_pass_filter(1000 * effect_strength)
63
  elif effect_type == "かちかち化":
64
+ return sound.high_pass_filter(3000 * effect_strength)
65
  elif effect_type == "減衰":
66
+ return sound.fade_out(int(len(sound) * effect_strength))
67
  elif effect_type == "リバーブ":
68
  reversed_sound = sound.reverse()
69
+ faded = reversed_sound.fade_in(200 * effect_strength).fade_out(200 * effect_strength)
70
+ return (sound + faded.reverse()) - (10 * effect_strength)
71
  elif effect_type == "音揺れ":
72
+ return wobble(sound, effect_strength)
73
  else:
74
  return sound
75
 
76
+ def wobble(sound, strength):
77
  # 0.2秒ごとにランダムにピッチを揺らす
78
  chunk_ms = 200
79
  chunks = [sound[i:i+chunk_ms] for i in range(0, len(sound), chunk_ms)]
80
  wobbled = AudioSegment.empty()
81
  for chunk in chunks:
82
+ pitch_shift = np.random.uniform(1 - 0.03 * strength, 1 + 0.03 * strength) # ちょっと揺れる
83
  chunk = change_pitch(chunk, pitch_shift)
84
  wobbled += chunk
85
  return wobbled
 
99
 
100
  with gr.Row():
101
  effect_dropdown = gr.Dropdown(choices=EFFECTS, value="なし", label="エフェクトを選ぶ")
102
+ effect_strength_slider = gr.Slider(0.1, 3.0, value=1.0, step=0.05, label="エフェクト強さ")
103
 
104
  with gr.Row():
105
  submit_btn = gr.Button("生成する")
 
108
 
109
  submit_btn.click(
110
  fn=generate_tts,
111
+ inputs=[text_input, template_dropdown, pitch_slider, speed_slider, effect_dropdown, effect_strength_slider],
112
  outputs=audio_output
113
  )
114