soheillotfi commited on
Commit
9a4b7bb
Β·
1 Parent(s): 2088803

remove password

Browse files
Files changed (2) hide show
  1. README.md +2 -3
  2. app.py +87 -30
README.md CHANGED
@@ -41,7 +41,7 @@ Speaker diarization is the process of partitioning an audio stream into homogene
41
  βœ… **Real-time Progress**: Watch processing happen live
42
  βœ… **Speaker Labels**: Get transcripts with "Speaker 1", "Speaker 2" etc.
43
  βœ… **Multiple Outputs**: Download transcript (.txt) and subtitles (.srt)
44
- βœ… **Password Protected**: Controlled access to demo resources
45
 
46
  ## Demo Limitations
47
 
@@ -55,8 +55,7 @@ Speaker diarization is the process of partitioning an audio stream into homogene
55
 
56
  ## How to Use
57
 
58
- 1. **Enter Demo Password** (contact developer if needed)
59
- 2. **Upload Audio File** - keep it under 10MB and 5 minutes
60
  3. **Configure Settings** - choose model and language
61
  4. **Start Processing** - wait for CPU processing to complete
62
  5. **Download Results** - get transcript and subtitle files
 
41
  βœ… **Real-time Progress**: Watch processing happen live
42
  βœ… **Speaker Labels**: Get transcripts with "Speaker 1", "Speaker 2" etc.
43
  βœ… **Multiple Outputs**: Download transcript (.txt) and subtitles (.srt)
44
+ βœ… **Free Access**: Open demo for everyone to try
45
 
46
  ## Demo Limitations
47
 
 
55
 
56
  ## How to Use
57
 
58
+ 1. **Upload Audio File** - keep it under 10MB and 5 minutes
 
59
  3. **Configure Settings** - choose model and language
60
  4. **Start Processing** - wait for CPU processing to complete
61
  5. **Download Results** - get transcript and subtitle files
app.py CHANGED
@@ -13,14 +13,7 @@ DEMO_MODE = True
13
  MAX_FILE_SIZE_MB = 10
14
  ALLOWED_MODELS = ["tiny.en", "base.en", "small.en"]
15
 
16
- def authenticate(password):
17
- """Check if the provided password is valid"""
18
- valid_passwords = [
19
- os.getenv("DEMO_PASSWORD", "whisper2024"),
20
- "demo123",
21
- "whisper_demo"
22
- ]
23
- return password in valid_passwords
24
 
25
  def check_file_size(file_path):
26
  """Check if file is within demo limits"""
@@ -35,12 +28,9 @@ def check_file_size(file_path):
35
  except Exception as e:
36
  return False, f"Error checking file: {str(e)}"
37
 
38
- def run_diarization(audio_file, password, model, language, enable_stemming, suppress_numerals, batch_size, processing_mode, num_speakers):
39
  """Main diarization function"""
40
 
41
- if not authenticate(password):
42
- return None, None, "❌ Invalid password. Please contact the developer for access."
43
-
44
  if not audio_file:
45
  return None, None, "❌ Please upload an audio file."
46
 
@@ -95,16 +85,59 @@ def run_diarization(audio_file, password, model, language, enable_stemming, supp
95
  zip_file.write(file_path, os.path.basename(file_path))
96
  if file_path.endswith('.txt'):
97
  with open(file_path, 'r', encoding='utf-8') as f:
98
- transcript_content = f.read()[:1000]
99
 
100
- return zip_path, transcript_content, f"βœ… Processing complete! Generated {len(output_files)} files."
 
 
101
  else:
102
- return None, None, "❌ No output files generated."
103
  else:
104
- return None, None, f"❌ Processing failed: {stderr}"
105
 
106
  except Exception as e:
107
- return None, None, f"❌ Error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
  def update_speaker_visibility(mode):
110
  """Show/hide speaker count based on processing mode"""
@@ -137,7 +170,7 @@ with gr.Blocks(title="🎀 Whisper Speaker Diarization Demo") as demo:
137
 
138
  with gr.Row():
139
  with gr.Column(scale=2):
140
- password_input = gr.Textbox(label="πŸ” Demo Password", placeholder="Enter demo password", type="password")
141
 
142
  audio_input = gr.Audio(label="πŸ“ Upload Audio File (Max 10MB)", type="filepath")
143
  gr.Markdown("*Supported: MP3, WAV, M4A, FLAC, etc.*")
@@ -174,12 +207,11 @@ with gr.Blocks(title="🎀 Whisper Speaker Diarization Demo") as demo:
174
  with gr.Column(scale=1):
175
  gr.Markdown("""
176
  ### πŸ“š How to Use
177
- 1. **Enter password** (contact developer)
178
- 2. **Upload audio** (≀10MB, ≀5min recommended)
179
- 3. **Choose processing mode**
180
- 4. **Configure settings** (optional)
181
- 5. **Click process** and wait
182
- 6. **Download results**
183
 
184
  ### 🎯 Processing Modes
185
  - **Standard**: Traditional speaker diarization
@@ -187,7 +219,28 @@ with gr.Blocks(title="🎀 Whisper Speaker Diarization Demo") as demo:
187
  """)
188
 
189
  download_output = gr.File(label="πŸ“¦ Download Results", visible=False)
190
- transcript_output = gr.Textbox(label="πŸ“„ Transcript Preview", lines=10, visible=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  result_output = gr.Textbox(label="πŸ“‹ Results", lines=5)
192
 
193
  # Wire up mode visibility
@@ -198,17 +251,21 @@ with gr.Blocks(title="🎀 Whisper Speaker Diarization Demo") as demo:
198
  )
199
 
200
  def process_wrapper(*args):
201
- download_file, transcript, result_text = run_diarization(*args)
 
202
  return (
203
- download_file, transcript or "", result_text or "",
 
 
 
204
  gr.update(visible=download_file is not None),
205
- gr.update(visible=bool(transcript))
206
  )
207
 
208
  process_btn.click(
209
  fn=process_wrapper,
210
- inputs=[audio_input, password_input, model_input, language_input, stemming_input, numerals_input, batch_size_input, processing_mode, num_speakers],
211
- outputs=[download_output, transcript_output, result_output, download_output, transcript_output]
212
  )
213
 
214
  if __name__ == "__main__":
 
13
  MAX_FILE_SIZE_MB = 10
14
  ALLOWED_MODELS = ["tiny.en", "base.en", "small.en"]
15
 
16
+ # Password authentication removed for security
 
 
 
 
 
 
 
17
 
18
  def check_file_size(file_path):
19
  """Check if file is within demo limits"""
 
28
  except Exception as e:
29
  return False, f"Error checking file: {str(e)}"
30
 
31
+ def run_diarization(audio_file, model, language, enable_stemming, suppress_numerals, batch_size, processing_mode, num_speakers):
32
  """Main diarization function"""
33
 
 
 
 
34
  if not audio_file:
35
  return None, None, "❌ Please upload an audio file."
36
 
 
85
  zip_file.write(file_path, os.path.basename(file_path))
86
  if file_path.endswith('.txt'):
87
  with open(file_path, 'r', encoding='utf-8') as f:
88
+ transcript_content = f.read()
89
 
90
+ # Parse transcript for speaker separation
91
+ speaker_1_text, speaker_2_text = parse_speakers(transcript_content)
92
+ return zip_path, speaker_1_text, speaker_2_text, f"βœ… Processing complete! Generated {len(output_files)} files."
93
  else:
94
+ return None, "", "", "❌ No output files generated."
95
  else:
96
+ return None, "", "", f"❌ Processing failed: {stderr}"
97
 
98
  except Exception as e:
99
+ return None, "", "", f"❌ Error: {str(e)}"
100
+
101
+ def parse_speakers(transcript_content):
102
+ """Parse transcript content and separate by speakers"""
103
+ if not transcript_content:
104
+ return "", ""
105
+
106
+ lines = transcript_content.split('\n')
107
+ speaker_1_lines = []
108
+ speaker_2_lines = []
109
+
110
+ for line in lines:
111
+ line = line.strip()
112
+ if not line:
113
+ continue
114
+
115
+ # Look for speaker labels (common formats)
116
+ if line.startswith('SPEAKER_00') or line.startswith('Speaker 0') or line.startswith('[SPEAKER_00]'):
117
+ speaker_1_lines.append(line)
118
+ elif line.startswith('SPEAKER_01') or line.startswith('Speaker 1') or line.startswith('[SPEAKER_01]'):
119
+ speaker_2_lines.append(line)
120
+ else:
121
+ # If no clear speaker label, try to detect from content
122
+ if 'speaker' in line.lower():
123
+ if '0' in line or 'one' in line.lower() or 'first' in line.lower():
124
+ speaker_1_lines.append(line)
125
+ elif '1' in line or 'two' in line.lower() or 'second' in line.lower():
126
+ speaker_2_lines.append(line)
127
+ else:
128
+ # Default to speaker 1 if unclear
129
+ speaker_1_lines.append(line)
130
+ else:
131
+ # If no speaker indication, add to both or alternate
132
+ if len(speaker_1_lines) <= len(speaker_2_lines):
133
+ speaker_1_lines.append(line)
134
+ else:
135
+ speaker_2_lines.append(line)
136
+
137
+ speaker_1_text = '\n'.join(speaker_1_lines) if speaker_1_lines else "No content detected for Speaker 1"
138
+ speaker_2_text = '\n'.join(speaker_2_lines) if speaker_2_lines else "No content detected for Speaker 2"
139
+
140
+ return speaker_1_text, speaker_2_text
141
 
142
  def update_speaker_visibility(mode):
143
  """Show/hide speaker count based on processing mode"""
 
170
 
171
  with gr.Row():
172
  with gr.Column(scale=2):
173
+ # Password input removed for security
174
 
175
  audio_input = gr.Audio(label="πŸ“ Upload Audio File (Max 10MB)", type="filepath")
176
  gr.Markdown("*Supported: MP3, WAV, M4A, FLAC, etc.*")
 
207
  with gr.Column(scale=1):
208
  gr.Markdown("""
209
  ### πŸ“š How to Use
210
+ 1. **Upload audio** (≀10MB, ≀5min recommended)
211
+ 2. **Choose processing mode**
212
+ 3. **Configure settings** (optional)
213
+ 4. **Click process** and wait
214
+ 5. **Download results**
 
215
 
216
  ### 🎯 Processing Modes
217
  - **Standard**: Traditional speaker diarization
 
219
  """)
220
 
221
  download_output = gr.File(label="πŸ“¦ Download Results", visible=False)
222
+
223
+ # Separate transcript windows for each speaker
224
+ with gr.Row(visible=False) as transcript_row:
225
+ with gr.Column():
226
+ speaker1_output = gr.Textbox(
227
+ label="πŸ—£οΈ Speaker 1 Transcript",
228
+ lines=15,
229
+ max_lines=20,
230
+ show_copy_button=True,
231
+ container=True,
232
+ interactive=False
233
+ )
234
+ with gr.Column():
235
+ speaker2_output = gr.Textbox(
236
+ label="πŸ—£οΈ Speaker 2 Transcript",
237
+ lines=15,
238
+ max_lines=20,
239
+ show_copy_button=True,
240
+ container=True,
241
+ interactive=False
242
+ )
243
+
244
  result_output = gr.Textbox(label="πŸ“‹ Results", lines=5)
245
 
246
  # Wire up mode visibility
 
251
  )
252
 
253
  def process_wrapper(*args):
254
+ download_file, speaker1_text, speaker2_text, result_text = run_diarization(*args)
255
+ has_transcripts = bool(speaker1_text or speaker2_text)
256
  return (
257
+ download_file,
258
+ speaker1_text or "",
259
+ speaker2_text or "",
260
+ result_text or "",
261
  gr.update(visible=download_file is not None),
262
+ gr.update(visible=has_transcripts)
263
  )
264
 
265
  process_btn.click(
266
  fn=process_wrapper,
267
+ inputs=[audio_input, model_input, language_input, stemming_input, numerals_input, batch_size_input, processing_mode, num_speakers],
268
+ outputs=[download_output, speaker1_output, speaker2_output, result_output, download_output, transcript_row]
269
  )
270
 
271
  if __name__ == "__main__":