Spaces:
Sleeping
Sleeping
| import argparse | |
| import logging | |
| import math | |
| import os | |
| import random | |
| import datasets | |
| from datasets import load_dataset, load_metric | |
| from torch.utils.data import DataLoader | |
| from tqdm.auto import tqdm | |
| import gradio as gr | |
| import transformers | |
| from accelerate import Accelerator # huggingface package | |
| from transformers import ( | |
| AdamW, | |
| AutoConfig, | |
| AutoModelForSequenceClassification, | |
| AutoTokenizer, | |
| DataCollatorWithPadding, | |
| PretrainedConfig, | |
| SchedulerType, | |
| default_data_collator, | |
| get_scheduler, | |
| set_seed, | |
| BertTokenizer, | |
| ) | |
| from transformers.utils.versions import require_version | |
| import torch | |
| from test_module.modeling_transkimer import BertForSequenceClassification as TranskimerForSequenceClassification | |
| from test_module.modeling_transkimer_roberta import RobertaForSequenceClassification as TranskimerRobertaForSequenceClassification | |
| from test_module.modeling_utils import convert_softmax_mask_to_digit | |
| from blackbox_utils.my_attack import CharacterAttack | |
| from transformers import glue_processors as processors | |
| task_to_keys = { | |
| "cola": ("sentence", None), | |
| "mnli": ("premise", "hypothesis"), | |
| "mrpc": ("sentence1", "sentence2"), | |
| "qnli": ("question", "sentence"), | |
| "qqp": ("question1", "question2"), | |
| "rte": ("sentence1", "sentence2"), | |
| "sst2": ("sentence", None), | |
| "stsb": ("sentence1", "sentence2"), | |
| "wnli": ("sentence1", "sentence2"), | |
| "imdb": ("text", None), | |
| } | |
| model_path_dict = { | |
| "transkimer_sst2_not_pad":'./not_pad_0.5', | |
| } | |
| datasets.utils.logging.set_verbosity_error() | |
| transformers.utils.logging.set_verbosity_error() | |
| task_name = 'sst2' | |
| model_type = 'transkimer' | |
| # Load pretrained model and tokenizer | |
| model_path_key = f'{model_type}_{task_name}_not_pad' | |
| model_path = model_path_dict[model_path_key] | |
| config = AutoConfig.from_pretrained(model_path, num_labels=num_labels, finetuning_task=task_name) | |
| tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased', use_fast=True) | |
| model = TranskimerForSequenceClassification.from_pretrained(model_path,from_tf=bool(".ckpt" in model_path),config=config,) | |
| # Preprocessing the datasets | |
| sentence1_key, sentence2_key = task_to_keys[task_name] | |
| processor = processors[task_name]() | |
| label_list = processor.get_labels() | |
| label_to_id = {v: i for i, v in enumerate(label_list)} | |
| padding = False | |
| attack = CharacterAttack(f'{model_type}_{task_name}',model,tokenizer,device='cpu',max_per=10,padding=padding,max_length=128,label_to_id=label_to_id,sentence1_key=sentence1_key,sentence2_key=sentence2_key) | |
| def greet(text): | |
| text_input = [(text,None)] | |
| outputs,time = attack.get_prob(text_input) | |
| _,token_remained,_ = attack.output_analysis(outputs) | |
| return time,token_remained.item() | |
| iface = gr.Interface(fn=greet, inputs=["text","text"], outputs=["number","number"]) | |
| iface.launch() |