Spaces:
Paused
Paused
| from fastapi import FastAPI, Request | |
| from fastapi.responses import StreamingResponse, JSONResponse | |
| import outetts | |
| import io | |
| import json | |
| # Initialize the interface | |
| interface = outetts.Interface( | |
| config=outetts.ModelConfig.auto_config( | |
| model=outetts.Models.VERSION_1_0_SIZE_1B, | |
| # For llama.cpp backend | |
| #backend=outetts.Backend.LLAMACPP, | |
| #quantization=outetts.LlamaCppQuantization.FP16 | |
| # For transformers backend | |
| backend=outetts.Backend.HF, | |
| ) | |
| ) | |
| # Load the default speaker profile | |
| speaker = interface.load_default_speaker("EN-FEMALE-1-NEUTRAL") | |
| app = FastAPI() | |
| def greet_json(): | |
| return {"Hello": "World!"} | |
| async def websocket_tts(websocket: WebSocket): | |
| await websocket.accept() | |
| try: | |
| while True: | |
| # Empfange Text-Chunk vom Client | |
| data = await websocket.receive_text() | |
| # Generiere Audio aus Text | |
| output = interface.generate( | |
| config=outetts.GenerationConfig( | |
| text=data, | |
| generation_type=outetts.GenerationType.CHUNKED, | |
| speaker=speaker, | |
| sampler_config=outetts.SamplerConfig( | |
| temperature=0.4 | |
| ), | |
| ) | |
| ) | |
| # Schreibe Audio in BytesIO | |
| audio_buffer = io.BytesIO() | |
| output.save(audio_buffer) | |
| audio_bytes = audio_buffer.getvalue() | |
| # Sende Audiodaten als Bytes zurück | |
| await websocket.send_bytes(audio_bytes) | |
| except WebSocketDisconnect: | |
| pass | |
| ''' | |
| @app.post("/tts") | |
| async def tts_endpoint(request: Request): | |
| """ | |
| Accepts JSON {"text": "..."} and streams the generated audio as WAV. | |
| """ | |
| try: | |
| data = await request.json() | |
| text = data.get("text") | |
| if not text: | |
| return JSONResponse({"error": "Missing 'text' in request"}, status_code=400) | |
| # Generate audio from text | |
| output = interface.generate( | |
| config=outetts.GenerationConfig( | |
| text=text, | |
| generation_type=outetts.GenerationType.CHUNKED, | |
| speaker=speaker, | |
| sampler_config=outetts.SamplerConfig( | |
| temperature=0.4 | |
| ), | |
| ) | |
| ) | |
| audio_buffer = io.BytesIO() | |
| output.save(audio_buffer) | |
| audio_buffer.seek(0) | |
| def audio_stream(): | |
| yield audio_buffer.read() | |
| return StreamingResponse(audio_stream(), media_type="audio/wav") | |
| except Exception as e: | |
| return JSONResponse({"error": str(e)}, status_code=500) | |
| ''' | |
| # WebSocket endpoint removed; use POST /tts for TTS requests. | |