NeuralFalcon commited on
Commit
0a26c6f
·
verified ·
1 Parent(s): d40ba37

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +249 -0
app.py ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from subtitle import subtitle_maker,LANGUAGE_CODE
3
+ source_lang_list = ['Automatic', "English", "Hindi", "Bengali"]
4
+ available_language = LANGUAGE_CODE.keys()
5
+ source_lang_list.extend(available_language)
6
+
7
+ target_lang_list = ["English", "Hindi", "Bengali"]
8
+ target_lang_list.extend(available_language)
9
+
10
+
11
+ def update_target_lang(selected_src):
12
+ """Update target language automatically when source changes."""
13
+ if selected_src == "Automatic":
14
+ return "English"
15
+ else:
16
+ return selected_src
17
+
18
+
19
+ def ui1():
20
+ with gr.Blocks() as demo:
21
+ gr.Markdown("<center><h1 style='font-size: 32px;'>🎬 Auto Subtitle Generator</h1></center>")
22
+ gr.Markdown("**Note**: If you have a large video, upload the audio instead, it's much faster to upload.")
23
+
24
+
25
+ with gr.Row():
26
+ with gr.Column():
27
+ upload_media = gr.File(label="Upload Audio or Video File")
28
+ input_lang = gr.Dropdown(label="Source Language", choices=source_lang_list, value="English")
29
+ generate_btn = gr.Button("🚀 Generate Subtitle", variant="primary")
30
+ with gr.Accordion("⚙️ Translation using Google Translator", open=False):
31
+ output_lang = gr.Dropdown(label="Translate Into", choices=target_lang_list, value="English")
32
+
33
+ with gr.Column():
34
+ default_srt = gr.File(label="🎯 Original Subtitles (Default Generated by whisper)")
35
+ customized_srt = gr.File(label="📝 Readable Subtitles (Multi-line)")
36
+ word_level_srt = gr.File(label="🔠 Word-by-Word Subtitles")
37
+
38
+ with gr.Accordion("🌍 Other Subtitle Formats", open=False):
39
+ translated_srt = gr.File(label="🌐 Translated Subtitles")
40
+ shorts_srt = gr.File(label="📱 Shorts/Reels Subtitles")
41
+ transcript_txt = gr.File(label="📄 Full Transcript (Text File)")
42
+ transcript_box = gr.Textbox(label="🗒️ Transcript Preview", lines=3)
43
+
44
+ generate_btn.click(
45
+ fn=subtitle_maker,
46
+ inputs=[upload_media, input_lang, output_lang],
47
+ outputs=[default_srt, translated_srt, customized_srt, word_level_srt, shorts_srt, transcript_txt, transcript_box]
48
+ )
49
+
50
+ input_lang.change(
51
+ fn=update_target_lang,
52
+ inputs=input_lang,
53
+ outputs=output_lang
54
+ )
55
+
56
+ # Add example runs (optional)
57
+ # gr.Examples(
58
+ # examples=[
59
+ # ["sample.mp3", "English", "English"],
60
+ # ["sample.wav", "English", "Hindi"],
61
+ # ],
62
+ # inputs=[upload_media, input_lang, output_lang]
63
+ # )
64
+
65
+ return demo
66
+
67
+
68
+
69
+ def prompt_translation(language):
70
+ """
71
+ Generates a dubbing-friendly translation prompt for an .srt subtitle file.
72
+ Tailored for natural speech and timing accuracy.
73
+ """
74
+ prompt = f"""
75
+ -------------- You are a professional subtitle translator for **video dubbing**.
76
+ Translate the following `.srt` subtitle file into **{language}** while preserving timing, meaning, and emotional tone.
77
+
78
+ Output in JSON format exactly like this:
79
+
80
+ ```json
81
+ {{
82
+ "subtitle sequence number": {{
83
+ "timestamp": "original timestamp",
84
+ "actual subtitle text": "original English subtitle line",
85
+ "dubbing": "natural, dubbing-friendly {language} translation"
86
+ }}
87
+ }}
88
+ ```
89
+
90
+ **Guidelines for Translation:**
91
+
92
+ 1. **Understand the full context** before translating — read the entire subtitle file first.
93
+ 2. Translate into **natural, conversational {language}**, not a direct word-for-word translation.
94
+ 6. Keep translations **roughly similar in length** to the original so lip movements sync naturally.
95
+ """
96
+ return prompt
97
+
98
+
99
+ def prompt_fix_grammar(language="English"):
100
+ """
101
+ Generates a dubbing-friendly grammar correction prompt for an .srt subtitle file.
102
+ Tailored for natural speech and timing accuracy.
103
+ """
104
+ prompt = f"""
105
+ -------------- You are a professional subtitle editor for **video dubbing**.
106
+ Fix the grammar, spelling, and awkward phrasing in the following `.srt` subtitle file while preserving timing, meaning, and emotional tone.  
107
+ Do NOT translate — keep everything in {language}.
108
+
109
+ Output in JSON format exactly like this:
110
+
111
+ ```json
112
+ {{
113
+ "subtitle sequence number": {{
114
+ "timestamp": "original timestamp",
115
+ "actual subtitle text": "original {language} subtitle line",
116
+ "dubbing": "natural, dubbing-friendly corrected {language} line"
117
+ }}
118
+ }}
119
+ ```
120
+
121
+ **Guidelines for Grammar Fixing:**
122
+
123
+ 1. **Understand the full context** before editing — read the entire subtitle file first.
124
+ 2. Correct grammar, spelling, and phrasing errors while keeping the same meaning.
125
+ 4. Keep corrections **roughly similar in length** to the original so lip movements sync naturally.
126
+ """
127
+ return prompt
128
+
129
+
130
+ import pysrt
131
+
132
+ def prompt_maker(srt_path, target_language, task="Translation"):
133
+ txt_path = srt_path.replace(".srt", ".txt")
134
+ subs = pysrt.open(srt_path, encoding='utf-8')
135
+
136
+ with open(txt_path, 'w', encoding='utf-8') as f:
137
+ for sub in subs:
138
+ f.write(f"{sub.index}\n")
139
+ f.write(f"{sub.start} --> {sub.end}\n")
140
+ f.write(f"{sub.text}\n\n")
141
+ if task == "Translation":
142
+ f.write(prompt_translation(target_language))
143
+ else:
144
+ f.write(prompt_fix_grammar(target_language))
145
+
146
+ with open(txt_path, 'r', encoding='utf-8') as f:
147
+ content = f.read()
148
+
149
+ # return both prompt text and original path
150
+ return content, srt_path
151
+
152
+
153
+
154
+
155
+ import pysrt
156
+ import json
157
+ import os
158
+ def json_to_srt(json_script, srt_path):
159
+ """
160
+ Convert dubbing-friendly JSON back into .srt
161
+ Uses original srt_path to name output <name>_dubbing.srt
162
+ """
163
+ os.makedirs("./dubbing_srt", exist_ok=True)
164
+
165
+ base_name = os.path.basename(srt_path)
166
+ name_no_ext, _ = os.path.splitext(base_name)
167
+ output_srt_path = os.path.join("./dubbing_srt", f"{name_no_ext}_dubbing.srt")
168
+
169
+ # Load JSON
170
+ if isinstance(json_script, str):
171
+ json_object = json.loads(json_script)
172
+ else:
173
+ json_object = json_script
174
+
175
+ # Write to file
176
+ with open(output_srt_path, "w", encoding="utf-8") as f:
177
+ for i, (key, value) in enumerate(json_object.items(), start=1):
178
+ f.write(f"{i}\n")
179
+ f.write(f"{value['timestamp']}\n")
180
+ f.write(f"{value['dubbing']}\n\n")
181
+
182
+ return output_srt_path
183
+
184
+
185
+
186
+
187
+ def ui2():
188
+ with gr.Blocks() as demo:
189
+ gr.Markdown("<center><h1 style='font-size: 32px;'>🎬 Subtitle Translation Using LLM</h1></center>")
190
+
191
+ # hidden state to keep original srt path
192
+ srt_state = gr.State("")
193
+
194
+ with gr.Row():
195
+ with gr.Column():
196
+ gr.Markdown("### Step 1: Generate Prompt")
197
+ srt_file = gr.File(label="Upload .srt file generated by Whisper", file_types=[".srt"])
198
+ task = gr.Dropdown(
199
+ ["Translation", "Fix Grammar [English to English for dubbing]"],
200
+ label="Select Task",
201
+ value="Translation",
202
+ )
203
+ language = gr.Dropdown(target_lang_list, label="Select the language you want to translate into", value="English")
204
+ generate_btn = gr.Button("Generate Prompt")
205
+ output_prompt = gr.Textbox(
206
+ label="Copy & Paste this prompt in https://aistudio.google.com/",
207
+ lines=20
208
+ )
209
+
210
+ with gr.Column():
211
+ gr.Markdown("### Step 2: Paste JSON & Convert Back to SRT")
212
+ json_input = gr.Textbox(
213
+ label="Paste JSON script from https://aistudio.google.com/ ",
214
+ lines=20,
215
+ placeholder="Paste the JSON output here..."
216
+ )
217
+ convert_btn = gr.Button("Convert JSON → SRT")
218
+ srt_file_out = gr.File(label="Download new .srt")
219
+
220
+ # Button actions
221
+ generate_btn.click(
222
+ fn=prompt_maker,
223
+ inputs=[srt_file, language, task],
224
+ outputs=[output_prompt, srt_state],
225
+ )
226
+
227
+ convert_btn.click(
228
+ fn=json_to_srt,
229
+ inputs=[json_input, srt_state],
230
+ outputs=srt_file_out,
231
+ )
232
+
233
+ return demo
234
+
235
+
236
+
237
+
238
+ import click
239
+ @click.command()
240
+ @click.option("--debug", is_flag=True, default=False, help="Enable debug mode.")
241
+ @click.option("--share", is_flag=True, default=False, help="Enable sharing of the interface.")
242
+ def main(share,debug):
243
+ # def main(debug=True, share=True):
244
+ demo1 = ui1()
245
+ demo2 = ui2()
246
+ demo = gr.TabbedInterface([demo1, demo2], ["Generate SRT File", "SRT Translation"], title="")
247
+ demo.queue().launch(share=share,debug=debug)
248
+ if __name__ == "__main__":
249
+ main()