Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer | |
| # summary function - test for single gradio function interfrace | |
| def bulk_function(filename): | |
| # Create class for data preparation | |
| class SimpleDataset: | |
| def __init__(self, tokenized_texts): | |
| self.tokenized_texts = tokenized_texts | |
| def __len__(self): | |
| return len(self.tokenized_texts["input_ids"]) | |
| def __getitem__(self, idx): | |
| return {k: v[idx] for k, v in self.tokenized_texts.items()} | |
| # load tokenizer and model, create trainer | |
| model_name = "j-hartmann/MindMiner-Binary" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForSequenceClassification.from_pretrained(model_name) | |
| trainer = Trainer(model=model) | |
| print(filename, type(filename)) | |
| print(filename.name) | |
| # check type of input file | |
| if filename.name.split(".")[1] == "csv": | |
| print("entered") | |
| # read file, drop index if exists | |
| df_input = pd.read_csv(filename.name, index_col=False) | |
| if df_input.columns[0] == "Unnamed: 0": | |
| df_input = df_input.drop("Unnamed: 0", axis=1) | |
| elif filename.name.split(".")[1] == "xlsx": | |
| df_input = pd.read_excel(filename.name, index_col=False) | |
| # handle Unnamed | |
| if df_input.columns[0] == "Unnamed: 0": | |
| df_input = df_input.drop("Unnamed: 0", axis=1) | |
| else: | |
| return | |
| # read csv | |
| # even if index given, drop it | |
| #df_input = pd.read_csv(filename.name, index_col=False) | |
| #print("df_input", df_input) | |
| # expect csv format to be in: | |
| # 1: ID | |
| # 2: Texts | |
| # no index | |
| # store ids in ordered list | |
| ids = df_input[df_input.columns[0]].to_list() | |
| # store sentences in ordered list | |
| # expects sentences to be in second col | |
| # of csv with two cols | |
| lines_s = df_input[df_input.columns[1]].to_list() | |
| # Tokenize texts and create prediction data set | |
| tokenized_texts = tokenizer(lines_s,truncation=True,padding=True) | |
| pred_dataset = SimpleDataset(tokenized_texts) | |
| # Run predictions -> predict whole df | |
| predictions = trainer.predict(pred_dataset) | |
| # Transform predictions to labels | |
| preds = predictions.predictions.argmax(-1) | |
| labels = pd.Series(preds).map(model.config.id2label) | |
| scores = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1,keepdims=True)).max(1) | |
| # round scores | |
| scores_rounded = [round(score, 3) for score in scores] | |
| # scores raw | |
| temp = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1,keepdims=True)) | |
| # container | |
| low = [] | |
| high = [] | |
| # extract scores (as many entries as exist in pred_texts) | |
| for i in range(len(lines_s)): | |
| low.append(round(temp[i][0], 3)) | |
| high.append(round(temp[i][1], 3)) | |
| # define df | |
| df = pd.DataFrame(list(zip(ids,lines_s,labels,scores_rounded, low, high)), columns=[df_input.columns[0], df_input.columns[1],'max_label','max_score', 'low', 'high']) | |
| print(df) | |
| # save results to csv | |
| YOUR_FILENAME = filename.name.split(".")[0] + "_MindMiner_Predictions" + ".csv" # name your output file | |
| df.to_csv(YOUR_FILENAME, index=False) | |
| # return dataframe for space output | |
| return YOUR_FILENAME | |
| gr.Interface(bulk_function, inputs=[gr.inputs.File(file_count="single", type="file", label="Upload file", optional=False),], | |
| outputs=[gr.outputs.File(label="Output file")], | |
| # examples=[["YOUR_FILENAME.csv"]], # computes, doesn't export df so far | |
| theme="huggingface", | |
| title="Apply MindMiner to Your CSV", | |
| description="Upload csv file with 2 columns (in order): (a) ID column, (b) text column. The script returns a new file that includes both the ID column and text column together with the mind perception predictions using MindMiner.", | |
| allow_flagging=False, | |
| ).launch(debug=True) |