Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,7 +4,7 @@ import re
|
|
| 4 |
import gradio as gr
|
| 5 |
import asyncio
|
| 6 |
import logging
|
| 7 |
-
import
|
| 8 |
from serpapi import GoogleSearch
|
| 9 |
from pydantic import BaseModel
|
| 10 |
from autogen_agentchat.agents import AssistantAgent
|
|
@@ -20,6 +20,7 @@ import traceback
|
|
| 20 |
import soundfile as sf
|
| 21 |
import tempfile
|
| 22 |
from pydub import AudioSegment
|
|
|
|
| 23 |
|
| 24 |
# Set up logging
|
| 25 |
logging.basicConfig(
|
|
@@ -35,7 +36,16 @@ logger = logging.getLogger(__name__)
|
|
| 35 |
# Set up environment
|
| 36 |
OUTPUT_DIR = "outputs"
|
| 37 |
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
# Define Pydantic model for slide data
|
| 41 |
class Slide(BaseModel):
|
|
@@ -172,34 +182,17 @@ async def validate_and_convert_speaker_audio(speaker_audio):
|
|
| 172 |
logger.error("Failed to validate or convert speaker audio %s: %s", speaker_audio, str(e))
|
| 173 |
return None
|
| 174 |
|
| 175 |
-
# Helper function to generate audio using
|
| 176 |
def generate_xtts_audio(text, speaker_wav, output_path):
|
|
|
|
|
|
|
|
|
|
| 177 |
try:
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
"--model_name", "tts_models/multilingual/multi-dataset/xtts_v2",
|
| 181 |
-
"--encoder_path", "model_se.pth.tar",
|
| 182 |
-
"--encoder_config", "config_se.json",
|
| 183 |
-
"--speaker_wav", speaker_wav,
|
| 184 |
-
"--text", text,
|
| 185 |
-
"--out_path", output_path,
|
| 186 |
-
"--language_idx", "en"
|
| 187 |
-
]
|
| 188 |
-
logger.debug("Executing tts command: %s", " ".join(cmd))
|
| 189 |
-
result = subprocess.run(
|
| 190 |
-
cmd,
|
| 191 |
-
capture_output=True,
|
| 192 |
-
text=True,
|
| 193 |
-
input="y\n", # Automatically provide 'y' to any download prompt
|
| 194 |
-
check=True
|
| 195 |
-
)
|
| 196 |
-
logger.info("tts command succeeded for %s: %s", output_path, result.stdout)
|
| 197 |
return True
|
| 198 |
-
except subprocess.CalledProcessError as e:
|
| 199 |
-
logger.error("tts command failed for %s: %s\n%s", output_path, e.stderr, e.stdout)
|
| 200 |
-
return False
|
| 201 |
except Exception as e:
|
| 202 |
-
logger.error("
|
| 203 |
return False
|
| 204 |
|
| 205 |
# Helper function to extract JSON from messages
|
|
@@ -288,9 +281,19 @@ def extract_json_from_message(message):
|
|
| 288 |
# Function to generate Markdown and convert to PDF (landscape, centered)
|
| 289 |
def generate_slides_pdf(slides):
|
| 290 |
pdf = MarkdownPdf()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
for slide in slides:
|
| 292 |
content_lines = slide['content'].replace('\n', '\n\n')
|
| 293 |
markdown_content = f"""
|
|
|
|
|
|
|
| 294 |
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; text-align: center; padding: 20px;">
|
| 295 |
# {slide['title']}
|
| 296 |
|
|
@@ -300,6 +303,8 @@ def generate_slides_pdf(slides):
|
|
| 300 |
{content_lines}
|
| 301 |
</div>
|
| 302 |
|
|
|
|
|
|
|
| 303 |
---
|
| 304 |
"""
|
| 305 |
pdf.add_section(Section(markdown_content, toc=False))
|
|
@@ -328,6 +333,15 @@ async def on_generate(api_service, api_key, serpapi_key, title, topic, instructi
|
|
| 328 |
"""
|
| 329 |
return
|
| 330 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 331 |
model_client = get_model_client(api_service, api_key)
|
| 332 |
|
| 333 |
research_agent = AssistantAgent(
|
|
@@ -564,8 +578,7 @@ Example for 1 content slide:
|
|
| 564 |
|
| 565 |
# Generate PDF from slides
|
| 566 |
pdf_file = generate_slides_pdf(slides)
|
| 567 |
-
pdf_path = os.path.
|
| 568 |
-
print(f"PDF file generated: {pdf_file}")
|
| 569 |
|
| 570 |
audio_files = []
|
| 571 |
speaker_audio = speaker_audio if speaker_audio else "feynman.mp3"
|
|
@@ -580,22 +593,6 @@ Example for 1 content slide:
|
|
| 580 |
"""
|
| 581 |
return
|
| 582 |
|
| 583 |
-
# Verify XTTS-v2 model files
|
| 584 |
-
# required_files = [
|
| 585 |
-
# os.path.join(XTTS_MODEL_DIR, "model_se.pth.tar"),
|
| 586 |
-
# os.path.join(XTTS_MODEL_DIR, "config_se.json")
|
| 587 |
-
# ]
|
| 588 |
-
# for f in required_files:
|
| 589 |
-
# if not os.path.exists(f):
|
| 590 |
-
# logger.error("Missing XTTS-v2 model file: %s", f)
|
| 591 |
-
# yield f"""
|
| 592 |
-
# <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
| 593 |
-
# <h2 style="color: #d9534f;">Missing XTTS-v2 model files</h2>
|
| 594 |
-
# <p style="margin-top: 20px;">Please ensure XTTS-v2 is downloaded to {XTTS_MODEL_DIR} and try again.</p>
|
| 595 |
-
# </div>
|
| 596 |
-
# """
|
| 597 |
-
# return
|
| 598 |
-
|
| 599 |
# Process audio generation sequentially with retries
|
| 600 |
for i, script in enumerate(scripts):
|
| 601 |
cleaned_script = clean_script_text(script)
|
|
@@ -627,7 +624,7 @@ Example for 1 content slide:
|
|
| 627 |
|
| 628 |
success = generate_xtts_audio(current_text, validated_speaker_wav, audio_file)
|
| 629 |
if not success:
|
| 630 |
-
raise RuntimeError("
|
| 631 |
|
| 632 |
logger.info("Generated audio for slide %d: %s", i + 1, audio_file)
|
| 633 |
audio_files.append(audio_file)
|
|
|
|
| 4 |
import gradio as gr
|
| 5 |
import asyncio
|
| 6 |
import logging
|
| 7 |
+
import torch
|
| 8 |
from serpapi import GoogleSearch
|
| 9 |
from pydantic import BaseModel
|
| 10 |
from autogen_agentchat.agents import AssistantAgent
|
|
|
|
| 20 |
import soundfile as sf
|
| 21 |
import tempfile
|
| 22 |
from pydub import AudioSegment
|
| 23 |
+
from TTS.api import TTS
|
| 24 |
|
| 25 |
# Set up logging
|
| 26 |
logging.basicConfig(
|
|
|
|
| 36 |
# Set up environment
|
| 37 |
OUTPUT_DIR = "outputs"
|
| 38 |
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
| 39 |
+
os.environ["COQUI_TOS_AGREED"] = "1"
|
| 40 |
+
|
| 41 |
+
# Initialize TTS model
|
| 42 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 43 |
+
try:
|
| 44 |
+
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
|
| 45 |
+
logger.info("TTS model initialized on %s", device)
|
| 46 |
+
except Exception as e:
|
| 47 |
+
logger.error("Failed to initialize TTS model: %s", str(e))
|
| 48 |
+
tts = None
|
| 49 |
|
| 50 |
# Define Pydantic model for slide data
|
| 51 |
class Slide(BaseModel):
|
|
|
|
| 182 |
logger.error("Failed to validate or convert speaker audio %s: %s", speaker_audio, str(e))
|
| 183 |
return None
|
| 184 |
|
| 185 |
+
# Helper function to generate audio using Coqui TTS API
|
| 186 |
def generate_xtts_audio(text, speaker_wav, output_path):
|
| 187 |
+
if not tts:
|
| 188 |
+
logger.error("TTS model not initialized")
|
| 189 |
+
return False
|
| 190 |
try:
|
| 191 |
+
tts.tts_to_file(text=text, speaker_wav=speaker_wav, language="en", file_path=output_path)
|
| 192 |
+
logger.info("Generated audio for %s", output_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
return True
|
|
|
|
|
|
|
|
|
|
| 194 |
except Exception as e:
|
| 195 |
+
logger.error("Failed to generate audio for %s: %s", output_path, str(e))
|
| 196 |
return False
|
| 197 |
|
| 198 |
# Helper function to extract JSON from messages
|
|
|
|
| 281 |
# Function to generate Markdown and convert to PDF (landscape, centered)
|
| 282 |
def generate_slides_pdf(slides):
|
| 283 |
pdf = MarkdownPdf()
|
| 284 |
+
# Add LaTeX preamble for landscape orientation
|
| 285 |
+
preamble = r"""
|
| 286 |
+
\usepackage{pdflscape}
|
| 287 |
+
\newcommand{\blandscape}{\begin{landscape}}
|
| 288 |
+
\newcommand{\elandscape}{\end{landscape}}
|
| 289 |
+
"""
|
| 290 |
+
pdf.set_preamble(preamble)
|
| 291 |
+
|
| 292 |
for slide in slides:
|
| 293 |
content_lines = slide['content'].replace('\n', '\n\n')
|
| 294 |
markdown_content = f"""
|
| 295 |
+
\\blandscape
|
| 296 |
+
|
| 297 |
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; text-align: center; padding: 20px;">
|
| 298 |
# {slide['title']}
|
| 299 |
|
|
|
|
| 303 |
{content_lines}
|
| 304 |
</div>
|
| 305 |
|
| 306 |
+
\\elandscape
|
| 307 |
+
|
| 308 |
---
|
| 309 |
"""
|
| 310 |
pdf.add_section(Section(markdown_content, toc=False))
|
|
|
|
| 333 |
"""
|
| 334 |
return
|
| 335 |
|
| 336 |
+
if not tts:
|
| 337 |
+
yield f"""
|
| 338 |
+
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
| 339 |
+
<h2 style="color: #d9534f;">TTS model not initialized</h2>
|
| 340 |
+
<p style="margin-top: 20px;">Please ensure the Coqui TTS model is properly installed and try again.</p>
|
| 341 |
+
</div>
|
| 342 |
+
"""
|
| 343 |
+
return
|
| 344 |
+
|
| 345 |
model_client = get_model_client(api_service, api_key)
|
| 346 |
|
| 347 |
research_agent = AssistantAgent(
|
|
|
|
| 578 |
|
| 579 |
# Generate PDF from slides
|
| 580 |
pdf_file = generate_slides_pdf(slides)
|
| 581 |
+
pdf_path = f"file://{os.path.abspath(pdf_file)}"
|
|
|
|
| 582 |
|
| 583 |
audio_files = []
|
| 584 |
speaker_audio = speaker_audio if speaker_audio else "feynman.mp3"
|
|
|
|
| 593 |
"""
|
| 594 |
return
|
| 595 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 596 |
# Process audio generation sequentially with retries
|
| 597 |
for i, script in enumerate(scripts):
|
| 598 |
cleaned_script = clean_script_text(script)
|
|
|
|
| 624 |
|
| 625 |
success = generate_xtts_audio(current_text, validated_speaker_wav, audio_file)
|
| 626 |
if not success:
|
| 627 |
+
raise RuntimeError("TTS generation failed")
|
| 628 |
|
| 629 |
logger.info("Generated audio for slide %d: %s", i + 1, audio_file)
|
| 630 |
audio_files.append(audio_file)
|