Create README.md
Browse files
README.md
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
This model used hfl/chinese-roberta-wwm-ext-large backbone and was trained on SNLI, MNLI, DNLI, KvPI data in Chinese version.
|
2 |
+
Model structures are as follows:
|
3 |
+
class RobertaForSequenceClassification(nn.Module):
|
4 |
+
def __init__(self, tagset_size):
|
5 |
+
super(RobertaForSequenceClassification, self).__init__()
|
6 |
+
self.tagset_size = tagset_size
|
7 |
+
|
8 |
+
self.roberta_single= AutoModel.from_pretrained(pretrain_model_dir)
|
9 |
+
self.single_hidden2tag = RobertaClassificationHead(bert_hidden_dim, tagset_size)
|
10 |
+
|
11 |
+
def forward(self, input_ids, input_mask):
|
12 |
+
outputs_single = self.roberta_single(input_ids, input_mask, None)
|
13 |
+
hidden_states_single = outputs_single[1]#torch.tanh(self.hidden_layer_2(torch.tanh(self.hidden_layer_1(outputs_single[1])))) #(batch, hidden)
|
14 |
+
|
15 |
+
score_single = self.single_hidden2tag(hidden_states_single) #(batch, tag_set)
|
16 |
+
return score_single
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
class RobertaClassificationHead(nn.Module):
|
21 |
+
def __init__(self, bert_hidden_dim, num_labels):
|
22 |
+
super(RobertaClassificationHead, self).__init__()
|
23 |
+
self.dense = nn.Linear(bert_hidden_dim, bert_hidden_dim)
|
24 |
+
self.dropout = nn.Dropout(0.1)
|
25 |
+
self.out_proj = nn.Linear(bert_hidden_dim, num_labels)
|
26 |
+
|
27 |
+
def forward(self, features):
|
28 |
+
x = features#[:, 0, :] # take <s> token (equiv. to [CLS])
|
29 |
+
x = self.dropout(x)
|
30 |
+
x = self.dense(x)
|
31 |
+
x = torch.tanh(x)
|
32 |
+
x = self.dropout(x)
|
33 |
+
x = self.out_proj(x)
|
34 |
+
return x
|
35 |
+
model = RobertaForSequenceClassification(num_labels)
|