Spaces:

soheillotfi
/

whisper-diarization-demo

Sleeping

App Files Files Community

soheillotfi commited on Aug 24

Commit

9a4b7bb

1 Parent(s): 2088803

remove password

Browse files

Files changed (2) hide show

README.md +2 -3
app.py +87 -30

README.md CHANGED Viewed

@@ -41,7 +41,7 @@ Speaker diarization is the process of partitioning an audio stream into homogene
 ✅ **Real-time Progress**: Watch processing happen live
 ✅ **Speaker Labels**: Get transcripts with "Speaker 1", "Speaker 2" etc.
 ✅ **Multiple Outputs**: Download transcript (.txt) and subtitles (.srt)
-✅ **Password Protected**: Controlled access to demo resources
 ## Demo Limitations
@@ -55,8 +55,7 @@ Speaker diarization is the process of partitioning an audio stream into homogene
 ## How to Use
-1. **Enter Demo Password** (contact developer if needed)
-2. **Upload Audio File** - keep it under 10MB and 5 minutes
 3. **Configure Settings** - choose model and language
 4. **Start Processing** - wait for CPU processing to complete
 5. **Download Results** - get transcript and subtitle files

 ✅ **Real-time Progress**: Watch processing happen live
 ✅ **Speaker Labels**: Get transcripts with "Speaker 1", "Speaker 2" etc.
 ✅ **Multiple Outputs**: Download transcript (.txt) and subtitles (.srt)
+✅ **Free Access**: Open demo for everyone to try
 ## Demo Limitations
 ## How to Use
+1. **Upload Audio File** - keep it under 10MB and 5 minutes
 3. **Configure Settings** - choose model and language
 4. **Start Processing** - wait for CPU processing to complete
 5. **Download Results** - get transcript and subtitle files

app.py CHANGED Viewed

@@ -13,14 +13,7 @@ DEMO_MODE = True
 MAX_FILE_SIZE_MB = 10
 ALLOWED_MODELS = ["tiny.en", "base.en", "small.en"]
-def authenticate(password):
-    """Check if the provided password is valid"""
-    valid_passwords = [
-        os.getenv("DEMO_PASSWORD", "whisper2024"),
-        "demo123",
-        "whisper_demo"
-    ]
-    return password in valid_passwords
 def check_file_size(file_path):
     """Check if file is within demo limits"""
@@ -35,12 +28,9 @@ def check_file_size(file_path):
     except Exception as e:
         return False, f"Error checking file: {str(e)}"
-def run_diarization(audio_file, password, model, language, enable_stemming, suppress_numerals, batch_size, processing_mode, num_speakers):
     """Main diarization function"""
-    if not authenticate(password):
-        return None, None, "❌ Invalid password. Please contact the developer for access."
     if not audio_file:
         return None, None, "❌ Please upload an audio file."
@@ -95,16 +85,59 @@ def run_diarization(audio_file, password, model, language, enable_stemming, supp
                         zip_file.write(file_path, os.path.basename(file_path))
                         if file_path.endswith('.txt'):
                             with open(file_path, 'r', encoding='utf-8') as f:
-                                transcript_content = f.read()[:1000]
-                return zip_path, transcript_content, f"✅ Processing complete! Generated {len(output_files)} files."
             else:
-                return None, None, "❌ No output files generated."
         else:
-            return None, None, f"❌ Processing failed: {stderr}"
     except Exception as e:
-        return None, None, f"❌ Error: {str(e)}"
 def update_speaker_visibility(mode):
     """Show/hide speaker count based on processing mode"""
@@ -137,7 +170,7 @@ with gr.Blocks(title="🎤 Whisper Speaker Diarization Demo") as demo:
     with gr.Row():
         with gr.Column(scale=2):
-            password_input = gr.Textbox(label="🔐 Demo Password", placeholder="Enter demo password", type="password")
             audio_input = gr.Audio(label="📁 Upload Audio File (Max 10MB)", type="filepath")
             gr.Markdown("*Supported: MP3, WAV, M4A, FLAC, etc.*")
@@ -174,12 +207,11 @@ with gr.Blocks(title="🎤 Whisper Speaker Diarization Demo") as demo:
         with gr.Column(scale=1):
             gr.Markdown("""
             ### 📚 How to Use
-            1. **Enter password** (contact developer)
-            2. **Upload audio** (≤10MB, ≤5min recommended)
-            3. **Choose processing mode**
-            4. **Configure settings** (optional)
-            5. **Click process** and wait
-            6. **Download results**
             ### 🎯 Processing Modes
             - **Standard**: Traditional speaker diarization
@@ -187,7 +219,28 @@ with gr.Blocks(title="🎤 Whisper Speaker Diarization Demo") as demo:
             """)
     download_output = gr.File(label="📦 Download Results", visible=False)
-    transcript_output = gr.Textbox(label="📄 Transcript Preview", lines=10, visible=False)
     result_output = gr.Textbox(label="📋 Results", lines=5)
     # Wire up mode visibility
@@ -198,17 +251,21 @@ with gr.Blocks(title="🎤 Whisper Speaker Diarization Demo") as demo:
     )
     def process_wrapper(*args):
-        download_file, transcript, result_text = run_diarization(*args)
         return (
-            download_file, transcript or "", result_text or "",
             gr.update(visible=download_file is not None),
-            gr.update(visible=bool(transcript))
         )
     process_btn.click(
         fn=process_wrapper,
-        inputs=[audio_input, password_input, model_input, language_input, stemming_input, numerals_input, batch_size_input, processing_mode, num_speakers],
-        outputs=[download_output, transcript_output, result_output, download_output, transcript_output]
     )
 if __name__ == "__main__":

 MAX_FILE_SIZE_MB = 10
 ALLOWED_MODELS = ["tiny.en", "base.en", "small.en"]
+# Password authentication removed for security
 def check_file_size(file_path):
     """Check if file is within demo limits"""
     except Exception as e:
         return False, f"Error checking file: {str(e)}"
+def run_diarization(audio_file, model, language, enable_stemming, suppress_numerals, batch_size, processing_mode, num_speakers):
     """Main diarization function"""
     if not audio_file:
         return None, None, "❌ Please upload an audio file."
                         zip_file.write(file_path, os.path.basename(file_path))
                         if file_path.endswith('.txt'):
                             with open(file_path, 'r', encoding='utf-8') as f:
+                                transcript_content = f.read()
+                # Parse transcript for speaker separation
+                speaker_1_text, speaker_2_text = parse_speakers(transcript_content)
+                return zip_path, speaker_1_text, speaker_2_text, f"✅ Processing complete! Generated {len(output_files)} files."
             else:
+                return None, "", "", "❌ No output files generated."
         else:
+            return None, "", "", f"❌ Processing failed: {stderr}"
     except Exception as e:
+        return None, "", "", f"❌ Error: {str(e)}"
+def parse_speakers(transcript_content):
+    """Parse transcript content and separate by speakers"""
+    if not transcript_content:
+        return "", ""
+    lines = transcript_content.split('\n')
+    speaker_1_lines = []
+    speaker_2_lines = []
+    for line in lines:
+        line = line.strip()
+        if not line:
+            continue
+        # Look for speaker labels (common formats)
+        if line.startswith('SPEAKER_00') or line.startswith('Speaker 0') or line.startswith('[SPEAKER_00]'):
+            speaker_1_lines.append(line)
+        elif line.startswith('SPEAKER_01') or line.startswith('Speaker 1') or line.startswith('[SPEAKER_01]'):
+            speaker_2_lines.append(line)
+        else:
+            # If no clear speaker label, try to detect from content
+            if 'speaker' in line.lower():
+                if '0' in line or 'one' in line.lower() or 'first' in line.lower():
+                    speaker_1_lines.append(line)
+                elif '1' in line or 'two' in line.lower() or 'second' in line.lower():
+                    speaker_2_lines.append(line)
+                else:
+                    # Default to speaker 1 if unclear
+                    speaker_1_lines.append(line)
+            else:
+                # If no speaker indication, add to both or alternate
+                if len(speaker_1_lines) <= len(speaker_2_lines):
+                    speaker_1_lines.append(line)
+                else:
+                    speaker_2_lines.append(line)
+    speaker_1_text = '\n'.join(speaker_1_lines) if speaker_1_lines else "No content detected for Speaker 1"
+    speaker_2_text = '\n'.join(speaker_2_lines) if speaker_2_lines else "No content detected for Speaker 2"
+    return speaker_1_text, speaker_2_text
 def update_speaker_visibility(mode):
     """Show/hide speaker count based on processing mode"""
     with gr.Row():
         with gr.Column(scale=2):
+            # Password input removed for security
             audio_input = gr.Audio(label="📁 Upload Audio File (Max 10MB)", type="filepath")
             gr.Markdown("*Supported: MP3, WAV, M4A, FLAC, etc.*")
         with gr.Column(scale=1):
             gr.Markdown("""
             ### 📚 How to Use
+            1. **Upload audio** (≤10MB, ≤5min recommended)
+            2. **Choose processing mode**
+            3. **Configure settings** (optional)
+            4. **Click process** and wait
+            5. **Download results**
             ### 🎯 Processing Modes
             - **Standard**: Traditional speaker diarization
             """)
     download_output = gr.File(label="📦 Download Results", visible=False)
+    # Separate transcript windows for each speaker
+    with gr.Row(visible=False) as transcript_row:
+        with gr.Column():
+            speaker1_output = gr.Textbox(
+                label="🗣️ Speaker 1 Transcript",
+                lines=15,
+                max_lines=20,
+                show_copy_button=True,
+                container=True,
+                interactive=False
+            )
+        with gr.Column():
+            speaker2_output = gr.Textbox(
+                label="🗣️ Speaker 2 Transcript",
+                lines=15,
+                max_lines=20,
+                show_copy_button=True,
+                container=True,
+                interactive=False
+            )
     result_output = gr.Textbox(label="📋 Results", lines=5)
     # Wire up mode visibility
     )
     def process_wrapper(*args):
+        download_file, speaker1_text, speaker2_text, result_text = run_diarization(*args)
+        has_transcripts = bool(speaker1_text or speaker2_text)
         return (
+            download_file,
+            speaker1_text or "",
+            speaker2_text or "",
+            result_text or "",
             gr.update(visible=download_file is not None),
+            gr.update(visible=has_transcripts)
         )
     process_btn.click(
         fn=process_wrapper,
+        inputs=[audio_input, model_input, language_input, stemming_input, numerals_input, batch_size_input, processing_mode, num_speakers],
+        outputs=[download_output, speaker1_output, speaker2_output, result_output, download_output, transcript_row]
     )
 if __name__ == "__main__":