Spaces:

Agents-MCP-Hackathon
/

Narrated_Slideshow_Generator

Sleeping

App Files Files Community

cp557 commited on Jun 17

Commit

7f8dde5

verified ·

1 Parent(s): 866d021

Update generate_slideshow.py

Browse files

Files changed (1) hide show

generate_slideshow.py +23 -25

generate_slideshow.py CHANGED Viewed

@@ -1,9 +1,8 @@
 #!/usr/bin/env python3
 """
 Generates slide markdown plus TTS audio and images using Gemini models.
 Functions exposed:
-    generate_slideshow_with_audio(topic) -> (list_of_slide_markdown, list_of_audio_paths, list_of_image_paths)
 """
 import asyncio
@@ -38,7 +37,7 @@ except ImportError:
     print("Deepgram SDK not available. Install with 'pip install deepgram-sdk'")
     DEEPGRAM_AVAILABLE = False
-GEMINI_API_KEY = os.environ.get("GEMINI_KEY")
 DEEPGRAM_KEY = os.environ.get("DEEPGRAM_KEY")
 # Dictionary to store temporary directories for cleanup
@@ -175,9 +174,9 @@ def _extract_markdown_slides(markdown: str) -> list[dict]:
 # ──────────────────────────── Gemini Calls ───────────────────────────
-async def _generate_image(prompt: str, output_path: Path) -> str:
     """Generate an image using Gemini Imagen model and save it to the specified path."""
-    client = genai.Client(api_key=GEMINI_API_KEY)
     try:
         # Make this call in a separate thread to not block the event loop
@@ -210,9 +209,9 @@ async def _generate_image(prompt: str, output_path: Path) -> str:
         print(f"Error generating image: {e}")
         return ""
-def _generate_slideshow_markdown(topic: str) -> str:
     """Ask Gemini 2.5 Flash for a markdown deck following strict rules."""
-    client = genai.Client(api_key=GEMINI_API_KEY)
     #model = "gemini-2.5-flash-preview-05-20"
     model = "gemini-2.5-pro-preview-06-05"
@@ -220,21 +219,17 @@ def _generate_slideshow_markdown(topic: str) -> str:
 <role>
 You are SlideGen, an AI that creates fun and engaging narrated slide decks with visual elements about various topics.
 </role>
 <instructions>
 Create a presentation about '{topic}'.
 Include:
 - An introduction slide with bullet points about the overview of the presentation topic and the key areas that will be covered
 - 3 content slides with bullet points
 - A conclusion slide with bullet points summarizing the key points and insights.
 For each slide provide:
 1. Each title should be a single concise and coherent phrase accompanied by exactly one relevant emoji. (Do NOT use the colon ":" format for titles)
 2. 3-4 concise bullet points, you will go into more detail in the speaker notes.
 3. Clear prose speaker notes suitable for narration that is accessible to general audiences
 4. A detailed and specific image prompt for an AI image generator that is relevent to the slide's content. Do not include any text in the image.
 Respond with a JSON array where each element represents a slide in the following format:
 ```json
 [
@@ -275,9 +270,9 @@ Respond with a JSON array where each element represents a slide in the following
     return response.text.strip()
-async def _generate_tts(narration: str, out_path: Path):
     """GenAI TTS → WAV - Async version with fallback model support"""
-    client = genai.Client(api_key=GEMINI_API_KEY)
     # Try with flash model first, then fall back to pro model if needed
     models_to_try = ["gemini-2.5-flash-preview-tts", "gemini-2.5-pro-preview-06-05"]
@@ -387,17 +382,22 @@ def _generate_tts_with_deepgram(narration: str, out_path: Path):
 # ──────────────────────── Public Entry Point ───────────────────
-async def generate_slideshow_with_audio_async(topic: str, **kwargs):
     """
     Async version of generate_slideshow_with_audio that processes slides concurrently.
     Returns:
         slides_md : list[str]     – markdown for each slide
         audio     : list[str]     – file paths (one per slide, same order)
         images    : list[str|None] – file paths for slide images (one per slide, same order)
     """
     # Get JSON response from Gemini
-    json_response = _generate_slideshow_markdown(topic)
     # Parse JSON into slides data
     slides_data = _parse_slides_json(json_response)
@@ -439,7 +439,7 @@ async def generate_slideshow_with_audio_async(topic: str, **kwargs):
         # Schedule TTS task
         if narration:
             print(f"Scheduling TTS for slide {i} -> {wav_path}")
-            tts_tasks.append(_generate_tts(narration, wav_path))
         else:
             # Create empty placeholder WAV if no narration
             with open(wav_path, "wb") as f:
@@ -457,7 +457,7 @@ async def generate_slideshow_with_audio_async(topic: str, **kwargs):
             image_path = pres_dir / f"{safe_topic}_slide_{i:02d}_image.jpg"
             print(f"Scheduling image for slide {i} -> {image_path}")
             # Store task with index to track which slide it belongs to
-            image_tasks.append((i-1, _generate_image(image_prompt, image_path)))
         else:
             print(f"No image prompt for slide {i}, skipping image generation.")
@@ -491,13 +491,14 @@ async def generate_slideshow_with_audio_async(topic: str, **kwargs):
     return slides_md, audio_files, slide_images
-def generate_slideshow_with_audio(topic: str, **kwargs):
     """
     Synchronous wrapper for the async slideshow generation function.
     Maintains backward compatibility with existing code.
     Args:
         topic: The topic to generate a slideshow about
         **kwargs: Optional parameters including:
             - session_id: Unique identifier for the user session
@@ -506,25 +507,22 @@ def generate_slideshow_with_audio(topic: str, **kwargs):
         audio     : list[str]     – file paths (one per slide, same order)
         images    : list[str|None] – file paths for slide images (one per slide, same order)
     """
-    return asyncio.run(generate_slideshow_with_audio_async(topic, **kwargs))
-def validate_topic(topic: str) -> bool:
     """Use Gemini Flash Preview to determine if a topic is suitable for a slideshow."""
-    client = genai.Client(api_key=GEMINI_API_KEY)
     system_prompt = f'''
 <role>
 You are SlideGenInputGuard, an AI assistant that determines if a user input is a suitable topic for a narrated slideshow presentation.
 </role>
 <instructions>
 Evaluate if "{topic}" is a real-world topic, question, or concept suitable for an educational slideshow. It is fine to include topics that are silly and not real-world topics.
 If it is a valid topic, respond with exactly: 1
 If it is nonsense, gibberish, meaningless, empty, or not a valid topic, respond with exactly: 0
 Only respond with a single digit: 1 or 0. No spaces, newlines or explanations. JUST THE NUMBER 1 OR 0.
 </instructions>
 <examples>
 Input:How does lightning form?
 Output:1
@@ -559,4 +557,4 @@ Output:0
         config=types.GenerateContentConfig(response_mime_type="text/plain", temperature=0),
     )
     result = response.text.strip()
-    return result == "1"

 #!/usr/bin/env python3
 """
 Generates slide markdown plus TTS audio and images using Gemini models.
 Functions exposed:
+    generate_slideshow_with_audio(topic, api_key) -> (list_of_slide_markdown, list_of_audio_paths, list_of_image_paths)
 """
 import asyncio
     print("Deepgram SDK not available. Install with 'pip install deepgram-sdk'")
     DEEPGRAM_AVAILABLE = False
+# Remove the global API key - it will be passed as parameter
 DEEPGRAM_KEY = os.environ.get("DEEPGRAM_KEY")
 # Dictionary to store temporary directories for cleanup
 # ──────────────────────────── Gemini Calls ───────────────────────────
+async def _generate_image(prompt: str, output_path: Path, api_key: str) -> str:
     """Generate an image using Gemini Imagen model and save it to the specified path."""
+    client = genai.Client(api_key=api_key)
     try:
         # Make this call in a separate thread to not block the event loop
         print(f"Error generating image: {e}")
         return ""
+def _generate_slideshow_markdown(topic: str, api_key: str) -> str:
     """Ask Gemini 2.5 Flash for a markdown deck following strict rules."""
+    client = genai.Client(api_key=api_key)
     #model = "gemini-2.5-flash-preview-05-20"
     model = "gemini-2.5-pro-preview-06-05"
 <role>
 You are SlideGen, an AI that creates fun and engaging narrated slide decks with visual elements about various topics.
 </role>
 <instructions>
 Create a presentation about '{topic}'.
 Include:
 - An introduction slide with bullet points about the overview of the presentation topic and the key areas that will be covered
 - 3 content slides with bullet points
 - A conclusion slide with bullet points summarizing the key points and insights.
 For each slide provide:
 1. Each title should be a single concise and coherent phrase accompanied by exactly one relevant emoji. (Do NOT use the colon ":" format for titles)
 2. 3-4 concise bullet points, you will go into more detail in the speaker notes.
 3. Clear prose speaker notes suitable for narration that is accessible to general audiences
 4. A detailed and specific image prompt for an AI image generator that is relevent to the slide's content. Do not include any text in the image.
 Respond with a JSON array where each element represents a slide in the following format:
 ```json
 [
     return response.text.strip()
+async def _generate_tts(narration: str, out_path: Path, api_key: str):
     """GenAI TTS → WAV - Async version with fallback model support"""
+    client = genai.Client(api_key=api_key)
     # Try with flash model first, then fall back to pro model if needed
     models_to_try = ["gemini-2.5-flash-preview-tts", "gemini-2.5-pro-preview-06-05"]
 # ──────────────────────── Public Entry Point ───────────────────
+async def generate_slideshow_with_audio_async(topic: str, api_key: str, **kwargs):
     """
     Async version of generate_slideshow_with_audio that processes slides concurrently.
+    Args:
+        topic: The topic to generate a slideshow about
+        api_key: Gemini API key
+        **kwargs: Optional parameters including session_id
     Returns:
         slides_md : list[str]     – markdown for each slide
         audio     : list[str]     – file paths (one per slide, same order)
         images    : list[str|None] – file paths for slide images (one per slide, same order)
     """
     # Get JSON response from Gemini
+    json_response = _generate_slideshow_markdown(topic, api_key)
     # Parse JSON into slides data
     slides_data = _parse_slides_json(json_response)
         # Schedule TTS task
         if narration:
             print(f"Scheduling TTS for slide {i} -> {wav_path}")
+            tts_tasks.append(_generate_tts(narration, wav_path, api_key))
         else:
             # Create empty placeholder WAV if no narration
             with open(wav_path, "wb") as f:
             image_path = pres_dir / f"{safe_topic}_slide_{i:02d}_image.jpg"
             print(f"Scheduling image for slide {i} -> {image_path}")
             # Store task with index to track which slide it belongs to
+            image_tasks.append((i-1, _generate_image(image_prompt, image_path, api_key)))
         else:
             print(f"No image prompt for slide {i}, skipping image generation.")
     return slides_md, audio_files, slide_images
+def generate_slideshow_with_audio(topic: str, api_key: str, **kwargs):
     """
     Synchronous wrapper for the async slideshow generation function.
     Maintains backward compatibility with existing code.
     Args:
         topic: The topic to generate a slideshow about
+        api_key: Gemini API key
         **kwargs: Optional parameters including:
             - session_id: Unique identifier for the user session
         audio     : list[str]     – file paths (one per slide, same order)
         images    : list[str|None] – file paths for slide images (one per slide, same order)
     """
+    return asyncio.run(generate_slideshow_with_audio_async(topic, api_key, **kwargs))
+def validate_topic(topic: str, api_key: str) -> bool:
     """Use Gemini Flash Preview to determine if a topic is suitable for a slideshow."""
+    client = genai.Client(api_key=api_key)
     system_prompt = f'''
 <role>
 You are SlideGenInputGuard, an AI assistant that determines if a user input is a suitable topic for a narrated slideshow presentation.
 </role>
 <instructions>
 Evaluate if "{topic}" is a real-world topic, question, or concept suitable for an educational slideshow. It is fine to include topics that are silly and not real-world topics.
 If it is a valid topic, respond with exactly: 1
 If it is nonsense, gibberish, meaningless, empty, or not a valid topic, respond with exactly: 0
 Only respond with a single digit: 1 or 0. No spaces, newlines or explanations. JUST THE NUMBER 1 OR 0.
 </instructions>
 <examples>
 Input:How does lightning form?
 Output:1
         config=types.GenerateContentConfig(response_mime_type="text/plain", temperature=0),
     )
     result = response.text.strip()
+    return result == "1"