import gradio as gr from transformers import AutoModelForSeq2SeqLM, AutoTokenizer import torch import re import spaces # Import spaces for ZeroGPU compatibility # Load the models and tokenizers for each translation direction # Faroese to English model_faero_eng = AutoModelForSeq2SeqLM.from_pretrained("barbaroo/nllb_200_600M_fo_en") tokenizer_faero_eng = AutoTokenizer.from_pretrained("barbaroo/nllb_200_600M_fo_en", src_lang="fao_Latn") # English to Faroese model_eng_faero = AutoModelForSeq2SeqLM.from_pretrained("barbaroo/nllb_200_1.3B_en_fo") tokenizer_eng_faero = AutoTokenizer.from_pretrained("barbaroo/nllb_200_1.3B_en_fo", src_lang="eng_Latn") # Check if GPU is available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model_faero_eng.to(device) model_eng_faero.to(device) # Function to split text into sentences based on simple punctuation def chunk_text_simple(text, max_length, tokenizer): # Split by punctuation (period, question mark, or exclamation mark) sentences = re.split(r'(?