| import torch | |
| from transformers import GPT2ForSequenceClassification | |
| class ClassificationHead(torch.nn.Module): | |
| """Classification Head for transformer encoders""" | |
| def __init__(self, class_size, embed_size, is_deep=False, use_xlnet=False, is_deeper=False): | |
| super(ClassificationHead, self).__init__() | |
| self.class_size = class_size | |
| self.embed_size = embed_size | |
| self.is_deep = is_deep | |
| self.is_deeper = is_deeper | |
| self.use_xlnet = use_xlnet | |
| if is_deep: | |
| self.mlp1 = torch.nn.Linear(embed_size, 128) | |
| self.mlp2 = torch.nn.Linear(128, 64) | |
| self.mlp3 = torch.nn.Linear(64, class_size) | |
| elif is_deeper: | |
| self.mlp1 = torch.nn.Linear(embed_size, 512) | |
| self.mlp2 = torch.nn.Linear(512, 256) | |
| self.mlp3 = torch.nn.Linear(256, 128) | |
| self.mlp4 = torch.nn.Linear(128, 64) | |
| self.mlp5 = torch.nn.Linear(64, class_size) | |
| elif use_xlnet: | |
| self.gpt = GPT2ForSequenceClassification.from_pretrained("microsoft/DialogRPT-updown") | |
| self.mlp = torch.nn.Linear(8, class_size, bias=True) | |
| else: | |
| self.mlp = torch.nn.Linear(embed_size, class_size) | |
| def forward(self, hidden_state, inputs_embeds=None): | |
| if self.is_deep: | |
| hidden_state = torch.nn.functional.relu(self.mlp1(hidden_state)) | |
| hidden_state = torch.nn.functional.relu(self.mlp2(hidden_state)) | |
| logits = self.mlp3(hidden_state) | |
| elif self.is_deeper: | |
| hidden_state = torch.nn.functional.relu(self.mlp1(hidden_state)) | |
| hidden_state = torch.nn.functional.relu(self.mlp2(hidden_state)) | |
| hidden_state = torch.nn.functional.relu(self.mlp3(hidden_state)) | |
| hidden_state = torch.nn.functional.relu(self.mlp4(hidden_state)) | |
| logits = self.mlp5(hidden_state) | |
| elif self.use_xlnet: | |
| hidden_state, _ = self.gpt(input_ids=hidden_state, inputs_embeds=inputs_embeds) | |
| logits = self.mlp(hidden_state) | |
| else: | |
| logits = self.mlp(hidden_state) | |
| return logits | |