table_questions2

Runtime error

App Files Files Community

table_questions2 / app.py

king007

Update app.py

cbab253 over 2 years ago

raw

history blame

3.5 kB

	import gradio as gr
	from transformers import (
	AutoModelForSeq2SeqLM,
	AutoModelForTableQuestionAnswering,
	AutoTokenizer,
	pipeline,
	TapexTokenizer,
	BartForConditionalGeneration
	)
	import pandas as pd
	import json

	# model_tapex = "microsoft/tapex-large-finetuned-wtq"
	# tokenizer_tapex = AutoTokenizer.from_pretrained(model_tapex)
	# model_tapex = AutoModelForSeq2SeqLM.from_pretrained(model_tapex)
	# pipe_tapex = pipeline(
	# "table-question-answering", model=model_tapex, tokenizer=tokenizer_tapex
	# )

	#new
	tokenizer = TapexTokenizer.from_pretrained("microsoft/tapex-large-finetuned-wtq")
	model = BartForConditionalGeneration.from_pretrained("microsoft/tapex-large-finetuned-wtq")


	# model_tapas = "google/tapas-large-finetuned-wtq"
	# tokenizer_tapas = AutoTokenizer.from_pretrained(model_tapas)
	# model_tapas = AutoModelForTableQuestionAnswering.from_pretrained(model_tapas)
	# pipe_tapas = pipeline(
	# "table-question-answering", model=model_tapas, tokenizer=tokenizer_tapas
	# )

	#new
	pipe_tapas = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wtq")
	pipe_tapas2 = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wikisql-supervised")




	def process2(query, csv_dataStr):
	# csv_data={"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
	csv_data = json.loads(csv_dataStr)
	table = pd.DataFrame.from_dict(csv_data)
	#microsoft
	encoding = tokenizer(table=table, query=query, return_tensors="pt")
	outputs = model.generate(**encoding)
	result_tapex=tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
	#google
	result_tapas = pipe_tapas(table=table, query=query)['cells'][0]
	#google2
	result_tapas2 = pipe_tapas2(table=table, query=query)['cells'][0]
	return result_tapex, result_tapas, result_tapas2


	# Inputs
	query_text = gr.Text(label="")
	# input_file = gr.File(label="Upload a CSV file", type="file")
	input_data = gr.Text(label="")
	# rows_slider = gr.Slider(label="Number of rows")

	# Output
	answer_text_tapex = gr.Text(label="")
	answer_text_tapas = gr.Text(label="")
	answer_text_tapas2 = gr.Text(label="")

	description = "This Space lets you ask questions on CSV documents with Microsoft [TAPEX-Large](https://huggingface.co/microsoft/tapex-large-finetuned-wtq) and Google [TAPAS-Large](https://huggingface.co/google/tapas-large-finetuned-wtq). \
	Both have been fine-tuned on the [WikiTableQuestions](https://huggingface.co/datasets/wikitablequestions) dataset. \n\n\
	A sample file with football statistics is available in the repository: \n\n\
	* Which team has the most wins? Answer: Manchester City FC\n\
	* Which team has the most wins: Chelsea, Liverpool or Everton? Answer: Liverpool\n\
	* Which teams have scored less than 40 goals? Answer: Cardiff City FC, Fulham FC, Brighton & Hove Albion FC, Huddersfield Town FC\n\
	* What is the average number of wins? Answer: 16 (rounded)\n\n\
	You can also upload your own CSV file. Please note that maximum sequence length for both models is 1024 tokens, \
	so you may need to limit the number of rows in your CSV file. Chunking is not implemented yet."

	iface = gr.Interface(
	theme="huggingface",
	description=description,
	layout="vertical",
	fn=process2,
	inputs=[query_text, input_data],
	outputs=[answer_text_tapex, answer_text_tapas, answer_text_tapas2],
	examples=[

	],
	allow_flagging="never",
	)

	iface.launch()