Spaces:

ByteDance-Seed
/

Seed-X

Running on Zero

App Files Files Community

Seed-X / app.py

YuLu0713

Update app.py

4d0bc11 verified 3 months ago

raw

history blame contribute delete

4.9 kB

	import os
	import gradio as gr
	import spaces
	import torch

	# 支持的语言选项
	LANGUAGES = {
	"Auto Detect": "auto",
	"English": "en",
	"Chinese": "zh",
	"Russian": "ru",
	"Japanese": "ka",
	"Korean": "ko",
	"Spanish": "es",
	"French": "fr",
	"Portuguese": "pt",
	"German": "de",
	"Italian": "it",
	"Thai": "th",
	"Vietnamese": "vi",
	"Indonesian": "id",
	"Malay": "ms",
	"Arabic": "ar",
	"Polish": "pl",
	"Dutch": "nl",
	"Romanian": "ro",
	"Turkish": "tr",
	"Czech": "cs",
	"Danish": "da",
	"Finnish": "fi",
	"Ukrainian": "uk",
	"Norwegian Bokmal":"nb",
	"Norwegian":"no",
	"Croatian":"hr",
	"Swedish":"sv",
	"Hungarian":"hu"
	}

	from transformers import AutoModelForCausalLM, AutoTokenizer

	device = "cuda"
	MODEL_NAME = "ByteDance-Seed/Seed-X-PPO-7B"
	print("Start dowload")
	def load_model():
	model = AutoModelForCausalLM.from_pretrained(MODEL_NAME,torch_dtype="bfloat16").to(device)
	print(f"Model loaded in {device}")
	return model

	model = load_model()
	print("Ednd dowload")
	# Loading the tokenizer once, because re-loading it takes about 1.5 seconds each time
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

	def prompting(text, source_lang, target_lang):
	l = LANGUAGES[target_lang]
	if l=="auto":
	prompt=f"Translate the following sentence into {target_lang} and explain it in detail:\n{text} <{l}>"
	else:
	prompt=f"Translate the following {source_lang} sentence into {target_lang} and explain it in detail:\n{text} <{l}>"
	return prompt

	@spaces.GPU(duration=120)
	def translate_text(text, source_lang, target_lang):
	if not text.strip():
	return "请输入要翻译的文本"
	try:
	prompt = prompting(text, source_lang, target_lang)
	print(prompt)
	input_tokens = (
	tokenizer(prompt, return_tensors="pt")
	.input_ids[0]
	.cpu()
	.numpy()
	.tolist()
	)
	translated_chunk = model.generate(
	input_ids=torch.tensor([input_tokens]).to(device),
	max_length=512,
	num_beams=4,
	num_return_sequences=1,
	)
	full_output = tokenizer.decode(translated_chunk[0], skip_special_tokens=True)
	full_output = full_output.replace(prompt.strip(),"")
	yield full_output
	except Exception as e:
	yield f"翻译出错: {str(e)}"


	# 创建 Gradio 界面
	with gr.Blocks(title="Seed-X") as demo:
	gr.Markdown("# 👋 Seed-X, powered by Bytedance")
	gr.Markdown(
	'A real-time translation tool based on Seed-X. It pushes the boundaries of translation capabilities within 7 billion parameters.'
	)

	with gr.Column():
	with gr.Row():
	source_lang = gr.Dropdown(
	choices=list(LANGUAGES.keys()),
	value="Auto Detect",
	label="Source Language"
	)
	target_lang = gr.Dropdown(
	choices=list(LANGUAGES.keys())[1:], # Exclude "Auto Detect"
	value="English",
	label="Target Language"
	)
	with gr.Row():
	translate_btn = gr.Button("Translate", variant='secondary')
	with gr.Row():
	source_text = gr.Textbox(
	label="Input Text",
	placeholder="Please enter the text to translate...",
	lines=5
	)
	target_text = gr.Textbox(
	label="Translation Result",
	interactive=False,
	lines=5
	)

	gr.Markdown(
	'(The content of the input and output is limited to no more than 5 lines.)'
	)

	# # 示例
	gr.Examples(
	examples=[
	["我说一句你说一车啊", "Chinese", "English"],
	["离谱她妈给离谱开门，离谱到家了", "Chinese", "English"],
	["雨女无瓜", "Chinese", "English"],
	["Their relationship is a total situationship.", "English", "Chinese"]
	],
	inputs=[source_text, source_lang, target_lang],
	outputs=target_text,
	fn=translate_text,
	cache_examples=True
	)

	# 按钮点击事件
	translate_btn.click(
	fn=translate_text,
	inputs=[source_text, source_lang, target_lang],
	outputs=target_text
	)

	# 支持回车键翻译
	source_text.submit(
	fn=translate_text,
	inputs=[source_text, source_lang, target_lang],
	outputs=target_text
	)

	gr.Markdown(
	"🌐[Github](https://github.com/ByteDance-Seed/Seed-X-7B)  📄[Report](https://arxiv.org/pdf/2507.13618)  🤗[Model](https://huggingface.co/collections/ByteDance-Seed/seed-x-6878753f2858bc17afa78543)"
	)

	# 启动应用
	if __name__ == "__main__":
	demo.launch()