|
|
import tensorflow as tf |
|
|
from translator import Translator |
|
|
from utils import tokenizer_utils |
|
|
from utils.preprocessing import input_processing, output_processing |
|
|
from models.transformer import Transformer |
|
|
from models.encoder import Encoder |
|
|
from models.decoder import Decoder |
|
|
from models.layers import EncoderLayer, DecoderLayer, MultiHeadAttention, point_wise_feed_forward_network |
|
|
from models.utils import masked_loss, masked_accuracy |
|
|
import argparse |
|
|
|
|
|
def main(sentences: list, model: tf.keras.Model, en_tokenizer, vi_tokenizer) -> None: |
|
|
""" |
|
|
Translates input English sentences to Vietnamese using a pre-trained model. |
|
|
|
|
|
Args: |
|
|
sentences (list): List of English sentences to translate. |
|
|
model (tf.keras.Model): The pre-trained translation model. |
|
|
en_tokenizer: English tokenizer. |
|
|
vi_tokenizer: Vietnamese tokenizer. |
|
|
""" |
|
|
|
|
|
translator = Translator(en_tokenizer, vi_tokenizer, model) |
|
|
|
|
|
|
|
|
for sentence in sentences: |
|
|
processed_sentence = input_processing(sentence) |
|
|
translated_text = translator(processed_sentence) |
|
|
translated_text = output_processing(translated_text) |
|
|
|
|
|
|
|
|
print("Input:", processed_sentence) |
|
|
print("Translated:", translated_text) |
|
|
print("-" * 50) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
parser = argparse.ArgumentParser( |
|
|
description="Translate English sentences to Vietnamese using a pre-trained transformer model.", |
|
|
epilog="Example: python translate.py --sentence 'Hello, world!' --sentence 'The sun is shining.'" |
|
|
) |
|
|
parser.add_argument( |
|
|
"--sentence", |
|
|
type=str, |
|
|
nargs="*", |
|
|
default=[ |
|
|
( |
|
|
"For at least six centuries, residents along a lake in the mountains of central Japan " |
|
|
"have marked the depth of winter by celebrating the return of a natural phenomenon " |
|
|
"once revered as the trail of a wandering god." |
|
|
) |
|
|
], |
|
|
help="One or more English sentences to translate (default: provided example sentence)" |
|
|
) |
|
|
parser.add_argument( |
|
|
"--model_path", |
|
|
type=str, |
|
|
default="ckpts/en_vi_translation.keras", |
|
|
help="Path to the pre-trained model file (default: ckpts/en_vi_translation.keras)" |
|
|
) |
|
|
|
|
|
|
|
|
args = parser.parse_args() |
|
|
|
|
|
|
|
|
custom_objects = { |
|
|
"Transformer": Transformer, |
|
|
"Encoder": Encoder, |
|
|
"Decoder": Decoder, |
|
|
"EncoderLayer": EncoderLayer, |
|
|
"DecoderLayer": DecoderLayer, |
|
|
"MultiHeadAttention": MultiHeadAttention, |
|
|
"point_wise_feed_forward_network": point_wise_feed_forward_network, |
|
|
"masked_loss": masked_loss, |
|
|
"masked_accuracy": masked_accuracy, |
|
|
} |
|
|
|
|
|
|
|
|
print("Loading model from:", args.model_path) |
|
|
loaded_model = tf.keras.models.load_model( |
|
|
args.model_path, custom_objects=custom_objects |
|
|
) |
|
|
print("Model loaded successfully.") |
|
|
|
|
|
|
|
|
en_tokenizer, vi_tokenizer = tokenizer_utils.load_tokenizers() |
|
|
|
|
|
|
|
|
main(sentences=args.sentence, model=loaded_model, en_tokenizer=en_tokenizer, vi_tokenizer=vi_tokenizer) |
|
|
|
|
|
|
|
|
while True: |
|
|
choice = input("Would you like to translate another sentence? (Y/n): ").strip().lower() |
|
|
if choice in ['no', 'n', 'quit', 'q']: |
|
|
print("Exiting the program.") |
|
|
break |
|
|
elif choice in ['yes', 'y']: |
|
|
new_sentence = input("Enter an English sentence to translate: ").strip() |
|
|
if new_sentence: |
|
|
main(sentences=[new_sentence], model=loaded_model, en_tokenizer=en_tokenizer, vi_tokenizer=vi_tokenizer) |
|
|
else: |
|
|
print("No sentence provided. Please try again.") |
|
|
else: |
|
|
print("Invalid input. Please enter 'y' or 'n'.") |