Jaren
/

chinese-roberta-wwm-ext-large-NLI

Model card Files Files and versions Community

Jaren commited on Aug 23, 2022

Commit

c8679a5

•

1 Parent(s): 165d871

Create README.md

Files changed (1) hide show

README.md +35 -0

README.md ADDED Viewed

	@@ -0,0 +1,35 @@

+This model used hfl/chinese-roberta-wwm-ext-large backbone and was trained on SNLI, MNLI, DNLI, KvPI data in Chinese version.
+Model structures are as follows:
+class RobertaForSequenceClassification(nn.Module):
+    def __init__(self, tagset_size):
+        super(RobertaForSequenceClassification, self).__init__()
+        self.tagset_size = tagset_size
+        self.roberta_single= AutoModel.from_pretrained(pretrain_model_dir)
+        self.single_hidden2tag = RobertaClassificationHead(bert_hidden_dim, tagset_size)
+    def forward(self, input_ids, input_mask):
+        outputs_single = self.roberta_single(input_ids, input_mask, None)
+        hidden_states_single = outputs_single[1]#torch.tanh(self.hidden_layer_2(torch.tanh(self.hidden_layer_1(outputs_single[1])))) #(batch, hidden)
+        score_single = self.single_hidden2tag(hidden_states_single) #(batch, tag_set)
+        return score_single
+class RobertaClassificationHead(nn.Module):
+    def __init__(self, bert_hidden_dim, num_labels):
+        super(RobertaClassificationHead, self).__init__()
+        self.dense = nn.Linear(bert_hidden_dim, bert_hidden_dim)
+        self.dropout = nn.Dropout(0.1)
+        self.out_proj = nn.Linear(bert_hidden_dim, num_labels)
+    def forward(self, features):
+        x = features#[:, 0, :]  # take <s> token (equiv. to [CLS])
+        x = self.dropout(x)
+        x = self.dense(x)
+        x = torch.tanh(x)
+        x = self.dropout(x)
+        x = self.out_proj(x)
+        return x
+model = RobertaForSequenceClassification(num_labels)