Spaces:
Running
on
Zero
Running
on
Zero
| import os | |
| import gradio as gr | |
| import spaces | |
| import torch | |
| # 支持的语言选项 | |
| LANGUAGES = { | |
| "Auto Detect": "auto", | |
| "English": "en", | |
| "Chinese": "zh", | |
| "Russian": "ru", | |
| "Japanese": "ka", | |
| "Korean": "ko", | |
| "Spanish": "es", | |
| "French": "fr", | |
| "Portuguese": "pt", | |
| "German": "de", | |
| "Italian": "it", | |
| "Thai": "th", | |
| "Vietnamese": "vi", | |
| "Indonesian": "id", | |
| "Malay": "ms", | |
| "Arabic": "ar", | |
| "Polish": "pl", | |
| "Dutch": "nl", | |
| "Romanian": "ro", | |
| "Turkish": "tr", | |
| "Czech": "cs", | |
| "Danish": "da", | |
| "Finnish": "fi", | |
| "Ukrainian": "uk", | |
| "Norwegian Bokmal":"nb", | |
| "Norwegian":"no", | |
| "Croatian":"hr", | |
| "Swedish":"sv", | |
| "Hungarian":"hu" | |
| } | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| device = "cuda" | |
| MODEL_NAME = "ByteDance-Seed/Seed-X-PPO-7B" | |
| print("Start dowload") | |
| def load_model(): | |
| model = AutoModelForCausalLM.from_pretrained(MODEL_NAME,torch_dtype="bfloat16").to(device) | |
| print(f"Model loaded in {device}") | |
| return model | |
| model = load_model() | |
| print("Ednd dowload") | |
| # Loading the tokenizer once, because re-loading it takes about 1.5 seconds each time | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| def prompting(text, source_lang, target_lang): | |
| l = LANGUAGES[target_lang] | |
| if l=="auto": | |
| prompt=f"Translate the following sentence into {target_lang} and explain it in detail:\n{text} <{l}>" | |
| else: | |
| prompt=f"Translate the following {source_lang} sentence into {target_lang} and explain it in detail:\n{text} <{l}>" | |
| return prompt | |
| def translate_text(text, source_lang, target_lang): | |
| if not text.strip(): | |
| return "请输入要翻译的文本" | |
| try: | |
| prompt = prompting(text, source_lang, target_lang) | |
| print(prompt) | |
| input_tokens = ( | |
| tokenizer(prompt, return_tensors="pt") | |
| .input_ids[0] | |
| .cpu() | |
| .numpy() | |
| .tolist() | |
| ) | |
| translated_chunk = model.generate( | |
| input_ids=torch.tensor([input_tokens]).to(device), | |
| max_length=512, | |
| num_beams=4, | |
| num_return_sequences=1, | |
| ) | |
| full_output = tokenizer.decode(translated_chunk[0], skip_special_tokens=True) | |
| full_output = full_output.replace(prompt.strip(),"") | |
| yield full_output | |
| except Exception as e: | |
| yield f"翻译出错: {str(e)}" | |
| # 创建 Gradio 界面 | |
| with gr.Blocks(title="Seed-X") as demo: | |
| gr.Markdown("# 👋 Seed-X, powered by Bytedance") | |
| gr.Markdown( | |
| 'A real-time translation tool based on Seed-X. It pushes the boundaries of translation capabilities within 7 billion parameters.' | |
| ) | |
| with gr.Column(): | |
| with gr.Row(): | |
| source_lang = gr.Dropdown( | |
| choices=list(LANGUAGES.keys()), | |
| value="Auto Detect", | |
| label="Source Language" | |
| ) | |
| target_lang = gr.Dropdown( | |
| choices=list(LANGUAGES.keys())[1:], # Exclude "Auto Detect" | |
| value="English", | |
| label="Target Language" | |
| ) | |
| with gr.Row(): | |
| translate_btn = gr.Button("Translate", variant='secondary') | |
| with gr.Row(): | |
| source_text = gr.Textbox( | |
| label="Input Text", | |
| placeholder="Please enter the text to translate...", | |
| lines=5 | |
| ) | |
| target_text = gr.Textbox( | |
| label="Translation Result", | |
| interactive=False, | |
| lines=5 | |
| ) | |
| gr.Markdown( | |
| '(The content of the input and output is limited to no more than 5 lines.)' | |
| ) | |
| # # 示例 | |
| gr.Examples( | |
| examples=[ | |
| ["我说一句你说一车啊", "Chinese", "English"], | |
| ["离谱她妈给离谱开门,离谱到家了", "Chinese", "English"], | |
| ["雨女无瓜", "Chinese", "English"], | |
| ["Their relationship is a total situationship.", "English", "Chinese"] | |
| ], | |
| inputs=[source_text, source_lang, target_lang], | |
| outputs=target_text, | |
| fn=translate_text, | |
| cache_examples=True | |
| ) | |
| # 按钮点击事件 | |
| translate_btn.click( | |
| fn=translate_text, | |
| inputs=[source_text, source_lang, target_lang], | |
| outputs=target_text | |
| ) | |
| # 支持回车键翻译 | |
| source_text.submit( | |
| fn=translate_text, | |
| inputs=[source_text, source_lang, target_lang], | |
| outputs=target_text | |
| ) | |
| gr.Markdown( | |
| "🌐[Github](https://github.com/ByteDance-Seed/Seed-X-7B) 📄[Report](https://arxiv.org/pdf/2507.13618) 🤗[Model](https://huggingface.co/collections/ByteDance-Seed/seed-x-6878753f2858bc17afa78543)" | |
| ) | |
| # 启动应用 | |
| if __name__ == "__main__": | |
| demo.launch() |