File size: 2,673 Bytes
b653f91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
"""
An interactive script to translate text to English using a fine-tuned NLLB model.
"""

import torch
from transformers import M2M100ForConditionalGeneration, NllbTokenizer

# --- 1. Configuration ---
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
SUPPORTED_LANGUAGES = {
    "nepali": "nep_Npan",
    "sinhala": "sin_Sinh",
}

# --- 2. Load Model and Tokenizer ---
def load_model_and_tokenizer(model_path):
    """Loads the model and tokenizer from the given path."""
    print(f"Loading model on {DEVICE.upper()}...")
    try:
        model = M2M100ForConditionalGeneration.from_pretrained(model_path).to(DEVICE)
        tokenizer = NllbTokenizer.from_pretrained(model_path)
        print("Model and tokenizer loaded successfully!")
        return model, tokenizer
    except Exception as e:
        print(f"Error loading model: {e}")
        return None, None

# --- 3. Translation Function ---
def translate_text(model, tokenizer, text: str, src_lang: str) -> str:
    """
    Translates a single string of text to English.
    """
    if src_lang not in SUPPORTED_LANGUAGES:
        return f"Language '{src_lang}' not supported."

    tokenizer.src_lang = SUPPORTED_LANGUAGES[src_lang]
    inputs = tokenizer(text, return_tensors="pt").to(DEVICE)

    generated_tokens = model.generate(
        **inputs,
        forced_bos_token_id=tokenizer.convert_tokens_to_ids("eng_Latn"),
        max_length=128,
    )

    return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]

# --- 4. Interactive Translation Loop ---
if __name__ == "__main__":
    # Select model path based on language
    lang_choice = input(f"Choose a language ({list(SUPPORTED_LANGUAGES.keys())}): ").lower()
    if lang_choice not in SUPPORTED_LANGUAGES:
        print("Invalid language choice.")
        exit()

    # For now, we assume a single model path. This can be extended.
    model_path = "models/nllb-finetuned-nepali-en"
    model, tokenizer = load_model_and_tokenizer(model_path)

    if model and tokenizer:
        print(f"\n--- Interactive Translation ({lang_choice.capitalize()}) ---")
        print(f"Enter a {lang_choice} sentence to translate to English.")
        print("Type 'exit' to quit.\n")

        while True:
            text_to_translate = input(f"{lang_choice.capitalize()}: ")
            if text_to_translate.lower() == "exit":
                break

            if not text_to_translate.strip():
                print("Please enter some text to translate.")
                continue

            english_translation = translate_text(model, tokenizer, text_to_translate, lang_choice)
            print(f"English: {english_translation}\n")