Jaren commited on
Commit
c8679a5
1 Parent(s): 165d871

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +35 -0
README.md ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ This model used hfl/chinese-roberta-wwm-ext-large backbone and was trained on SNLI, MNLI, DNLI, KvPI data in Chinese version.
2
+ Model structures are as follows:
3
+ class RobertaForSequenceClassification(nn.Module):
4
+ def __init__(self, tagset_size):
5
+ super(RobertaForSequenceClassification, self).__init__()
6
+ self.tagset_size = tagset_size
7
+
8
+ self.roberta_single= AutoModel.from_pretrained(pretrain_model_dir)
9
+ self.single_hidden2tag = RobertaClassificationHead(bert_hidden_dim, tagset_size)
10
+
11
+ def forward(self, input_ids, input_mask):
12
+ outputs_single = self.roberta_single(input_ids, input_mask, None)
13
+ hidden_states_single = outputs_single[1]#torch.tanh(self.hidden_layer_2(torch.tanh(self.hidden_layer_1(outputs_single[1])))) #(batch, hidden)
14
+
15
+ score_single = self.single_hidden2tag(hidden_states_single) #(batch, tag_set)
16
+ return score_single
17
+
18
+
19
+
20
+ class RobertaClassificationHead(nn.Module):
21
+ def __init__(self, bert_hidden_dim, num_labels):
22
+ super(RobertaClassificationHead, self).__init__()
23
+ self.dense = nn.Linear(bert_hidden_dim, bert_hidden_dim)
24
+ self.dropout = nn.Dropout(0.1)
25
+ self.out_proj = nn.Linear(bert_hidden_dim, num_labels)
26
+
27
+ def forward(self, features):
28
+ x = features#[:, 0, :] # take <s> token (equiv. to [CLS])
29
+ x = self.dropout(x)
30
+ x = self.dense(x)
31
+ x = torch.tanh(x)
32
+ x = self.dropout(x)
33
+ x = self.out_proj(x)
34
+ return x
35
+ model = RobertaForSequenceClassification(num_labels)