Spaces:
Runtime error
Runtime error
| import os | |
| import random | |
| import gradio as gr | |
| import wget | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
| import whisper | |
| from audiocraft.models import MusicGen | |
| # URLs de los modelos a descargar | |
| model_urls = [ | |
| "https://huggingface.co/leejet/FLUX.1-schnell-gguf/resolve/main/flux1-schnell-q2_k.gguf", | |
| "https://huggingface.co/aifoundry-org/FLUX.1-schnell-Quantized/resolve/main/flux1-schnell-Q2_K.gguf", | |
| "https://huggingface.co/qwp4w3hyb/gemma-2-27b-it-iMat-GGUF/resolve/main/gemma-2-27b-it-imat-IQ1_S.gguf", | |
| "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q2_K.gguf", | |
| "https://huggingface.co/WongBingbing/Meta-Llama-3.1-8B-Instruct-Q2_K-GGUF/resolve/main/meta-llama-3.1-8b-instruct-q2_k.gguf", | |
| "https://huggingface.co/city96/FLUX.1-schnell-gguf/resolve/main/flux1-schnell-Q2_K.gguf", | |
| "https://huggingface.co/mradermacher/L3-Super-Nova-RP-8B-i1-GGUF/resolve/main/L3-Super-Nova-RP-8B.i1-IQ1_M.gguf", | |
| "https://huggingface.co/zhhan/Phi-3-mini-4k-instruct_gguf_derived/resolve/main/Phi-3-mini-4k-instruct-q4.gguf" | |
| ] | |
| # Nombres de los archivos descargados | |
| model_files = [ | |
| "flux1-schnell-q2_k.gguf", | |
| "flux1-schnell-Q2_K.gguf", | |
| "gemma-2-27b-it-imat-IQ1_S.gguf", | |
| "llama-2-7b-chat.Q2_K.gguf", | |
| "meta-llama-3.1-8b-instruct-q2_k.gguf", | |
| "flux1-schnell-Q2_K.gguf", | |
| "L3-Super-Nova-RP-8B.i1-IQ1_M.gguf", | |
| "Phi-3-mini-4k-instruct-q4.gguf" | |
| ] | |
| # Función para descargar los modelos utilizando wget | |
| def download_models(model_urls, model_files): | |
| for url, file in zip(model_urls, model_files): | |
| if not os.path.exists(file): | |
| wget.download(url, out=file) | |
| # Inicializar el modelo de transcripción Whisper | |
| def initialize_whisper(): | |
| model = whisper.load_model("base") | |
| return model | |
| # Inicializa los modelos de transformers | |
| def initialize_transformer_models(): | |
| model_names = ["gpt2", "gpt2-medium", "gpt2-large"] # Puedes agregar más modelos | |
| models = [] | |
| for model_name in model_names: | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForCausalLM.from_pretrained(model_name) | |
| models.append((model, tokenizer)) | |
| return models | |
| # Función para la generación de canciones con MusicGen | |
| def generate_song(prompt, model_type="standard"): | |
| if model_type == "medium": | |
| model = MusicGen.get_pretrained("musicgen-medium") | |
| else: | |
| model = MusicGen.get_pretrained("melody") | |
| model.set_generation_params(duration=30) # Duración de la canción en segundos | |
| wav_output = model.generate(prompt) | |
| song_path = "generated_song.wav" | |
| model.save_wav(wav_output, song_path) | |
| return song_path | |
| # Función para transcribir audio con Whisper | |
| def transcribe_audio(audio_path, whisper_model): | |
| transcription = whisper_model.transcribe(audio_path) | |
| return transcription["text"] | |
| # Función para unificar las respuestas de diferentes modelos | |
| def unified_response(user_input, models): | |
| responses = [] | |
| for model, tokenizer in models: | |
| inputs = tokenizer(user_input, return_tensors="pt") | |
| outputs = model.generate(**inputs) | |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| responses.append(response) | |
| # Unificar respuestas (puedes aplicar más lógica aquí, como seleccionar la más común) | |
| final_response = random.choice(responses) | |
| return final_response | |
| # Función para el chatbot con Gradio | |
| def chatbot_response(user_input, models, whisper_model=None, audio_path=None): | |
| if user_input.lower() == "salir": | |
| return "Conexión terminada." | |
| if "imagen" in user_input.lower(): | |
| return "Funcionalidad de generación de imágenes no soportada por estos modelos." | |
| elif "canción" in user_input.lower() or "musica" in user_input.lower(): | |
| model_type = "medium" if "medium" in user_input.lower() else "standard" | |
| song_path = generate_song(user_input, model_type=model_type) | |
| return song_path # Devuelve la ruta de la canción generada | |
| elif audio_path: # Si se proporciona un archivo de audio, transcribirlo | |
| return transcribe_audio(audio_path, whisper_model) | |
| else: | |
| return unified_response(user_input, models) | |
| # Crear la interfaz de Gradio | |
| def create_gradio_interface(models, whisper_model): | |
| def gradio_chat(user_input, audio_input=None): | |
| response = chatbot_response(user_input, models, whisper_model, audio_input) | |
| if isinstance(response, str) and response.endswith(".png"): | |
| return None, response, None, None # Devuelve None en el texto y la imagen, y ninguna canción | |
| elif isinstance(response, str) and response.endswith(".wav"): | |
| return None, None, response, None # Devuelve None en el texto, ninguna imagen, y la canción | |
| else: | |
| return response, None, None, None # Devuelve el texto, ninguna imagen, ninguna canción, y ninguna transcripción | |
| # Crear interfaz con un input y cuatro outputs (texto, imagen, canción, y transcripción) | |
| iface = gr.Interface(fn=gradio_chat, inputs=["text", "audio"], outputs=["text", "image", "audio", "text"], title="Chatbot con Imágenes, Canciones, y Transcripción de Audio") | |
| return iface | |
| # Ejecuta el chatbot con Gradio | |
| def run_chatbot_with_gradio(): | |
| download_models(model_urls, model_files) # Descargar los modelos si no están presentes | |
| models = initialize_transformer_models() # Inicializar modelos de Transformers | |
| whisper_model = initialize_whisper() # Inicializar el modelo de Whisper | |
| iface = create_gradio_interface(models, whisper_model) | |
| iface.launch() | |
| if __name__ == "__main__": | |
| run_chatbot_with_gradio() | |