Spaces:

ignitariumcloud
/

NvidiaCanary-2.5

Sleeping

manueljohnson063 commited on Aug 22

Commit

7f2cf1a

verified ·

1 Parent(s): 3d2fc68

Upload app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -13,9 +13,6 @@ model_id = "nvidia/canary-qwen-2.5b"
 print("Loading NVIDIA Canary-Qwen-2.5B model using NeMo...")
 model = SALM.from_pretrained(model_id)
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model = model.to(device)
 def generate_text(prompt, max_tokens=200, temperature=0.7, top_p=0.9):
     """Generate text using the NVIDIA NeMo model (LLM mode)"""
@@ -31,8 +28,7 @@ def generate_text(prompt, max_tokens=200, temperature=0.7, top_p=0.9):
             )
         # Convert IDs to text using model's tokenizer
-        # response = model.tokenizer.ids_to_text(answer_ids[0].cpu())
-        response = model.tokenizer.ids_to_text(answer_ids[0].to(device))
         return response
     except Exception as e:
@@ -54,8 +50,7 @@ def transcribe_audio(audio_file, user_prompt="Transcribe the following:"):
         )
         # Convert IDs to text
-        # transcript = model.tokenizer.ids_to_text(answer_ids[0].cpu())
-        transcript = model.tokenizer.ids_to_text(answer_ids[0].to(device))
         return transcript
     except Exception as e:
@@ -238,4 +233,5 @@ with gr.Blocks(title="NVIDIA Canary-Qwen-2.5B Chat") as demo:
     )
 if __name__ == "__main__":
     demo.launch(share=True)

 print("Loading NVIDIA Canary-Qwen-2.5B model using NeMo...")
 model = SALM.from_pretrained(model_id)
 def generate_text(prompt, max_tokens=200, temperature=0.7, top_p=0.9):
     """Generate text using the NVIDIA NeMo model (LLM mode)"""
             )
         # Convert IDs to text using model's tokenizer
+        response = model.tokenizer.ids_to_text(answer_ids[0].cpu())
         return response
     except Exception as e:
         )
         # Convert IDs to text
+        transcript = model.tokenizer.ids_to_text(answer_ids[0].cpu())
         return transcript
     except Exception as e:
     )
 if __name__ == "__main__":
+    demo.queue(api_open=True)
     demo.launch(share=True)