Spaces:
Build error
Build error
#!g1.1 | |
import torch | |
import torch.nn as nn | |
from transformers import AutoTokenizer, AutoModel | |
class CategoryHead(nn.Module): | |
def __init__(self): | |
super(CategoryHead, self).__init__() | |
self.lin1 = nn.Linear(256, 64) | |
self.lin2 = nn.Linear(64, 5) | |
def forward(self, x): | |
x = torch.relu(self.lin1(x)) | |
x = self.lin2(x) | |
return x | |
class SentimentHead(nn.Module): | |
def __init__(self): | |
super(SentimentHead, self).__init__() | |
self.lin1 = nn.Linear(256, 64) | |
self.lin2 = nn.Linear(64, 1, bias=False) | |
def forward(self, x): | |
x = torch.relu(self.lin1(x)) | |
x = self.lin2(x) | |
return x | |
def mean_pooling(model_output, attention_mask): | |
input_mask_expanded = attention_mask.unsqueeze(-1).expand(model_output.size()).float() | |
sum_embeddings = torch.sum(model_output * input_mask_expanded, 1) | |
sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9) | |
return sum_embeddings / sum_mask | |
class UnionModel(nn.Module): | |
def __init__(self, model_path): | |
super(UnionModel, self).__init__() | |
self.bert_model = AutoModel.from_pretrained(model_path) | |
for _, param in self.bert_model.named_parameters(): | |
param.requires_grad = False | |
self.bert_model.pooler = nn.Linear(in_features=768, out_features=256) | |
self.bert_model.to('cpu') | |
self.category_head = CategoryHead() | |
self.sentiment_head = SentimentHead() | |
def forward(self, input): | |
output = self.bert_model(**input) | |
output = output.pooler_output | |
output = mean_pooling(output, input['attention_mask']) | |
return self.category_head(output), self.sentiment_head(output) | |
class LogisticCumulativeLink(nn.Module): | |
""" | |
Converts a single number to the proportional odds of belonging to a class. | |
Parameters | |
---------- | |
num_classes : int | |
Number of ordered classes to partition the odds into. | |
init_cutpoints : str (default='ordered') | |
How to initialize the cutpoints of the model. Valid values are | |
- ordered : cutpoints are initialized to halfway between each class. | |
- random : cutpoints are initialized with random values. | |
""" | |
def __init__(self, num_classes: int, | |
init_cutpoints: str = 'ordered') -> None: | |
assert num_classes > 2, ( | |
'Only use this model if you have 3 or more classes' | |
) | |
super().__init__() | |
self.num_classes = num_classes | |
self.init_cutpoints = init_cutpoints | |
if init_cutpoints == 'ordered': | |
num_cutpoints = self.num_classes - 1 | |
cutpoints = torch.arange(num_cutpoints).float() - num_cutpoints / 2 | |
self.cutpoints = nn.Parameter(cutpoints) | |
elif init_cutpoints == 'random': | |
cutpoints = torch.rand(self.num_classes - 1).sort()[0] | |
self.cutpoints = nn.Parameter(cutpoints) | |
else: | |
raise ValueError(f'{init_cutpoints} is not a valid init_cutpoints ' | |
f'type') | |
def forward(self, X: torch.Tensor) -> torch.Tensor: | |
""" | |
Equation (11) from | |
"On the consistency of ordinal regression methods", Pedregosa et. al. | |
""" | |
sigmoids = torch.sigmoid(self.cutpoints - X) | |
link_mat = sigmoids[:, 1:] - sigmoids[:, :-1] | |
link_mat = torch.cat(( | |
sigmoids[:, [0]], | |
link_mat, | |
(1 - sigmoids[:, [-1]]) | |
), dim=1) | |
return link_mat | |
class CustomOrdinalLogisticModel(nn.Module): | |
def __init__(self, predictor: nn.Module, num_classes: int, | |
init_cutpoints: str = 'ordered') -> None: | |
super().__init__() | |
self.num_classes = num_classes | |
self.predictor = predictor | |
self.link = LogisticCumulativeLink(self.num_classes, | |
init_cutpoints=init_cutpoints) | |
def forward(self, *args, **kwargs): | |
cat, sent = self.predictor(*args, **kwargs) | |
return cat, self.link(sent) | |
tokenizer = AutoTokenizer.from_pretrained('blanchefort/rubert-base-cased-sentiment-rusentiment') | |
model = CustomOrdinalLogisticModel(UnionModel('blanchefort/rubert-base-cased-sentiment-rusentiment'), 3).to('cpu') | |
model.load_state_dict(torch.load('best_model.pth', map_location='cpu'), strict=False) | |
def inference(input_data): | |
tokenized = tokenizer(input_data['sentence']) | |
input_ids = torch.LongTensor(tokenized['input_ids']).unsqueeze(0).to('cpu') | |
attention_mask = torch.IntTensor(tokenized['attention_mask']).unsqueeze(0).to('cpu') | |
model.eval() | |
answer = model({'input_ids': input_ids, 'attention_mask': attention_mask}) | |
answer[0][0] = torch.sigmoid(answer[0][0]) | |
return dict(answer=answer) | |