Amein-Eskinder
commited on
Commit
•
e542267
1
Parent(s):
b3d0035
Updated model files
Browse files- .gitattributes +2 -9
- README.md +48 -0
- config.json +111 -0
- eval.log +234 -0
- model.safetensors +3 -0
- pytorch_model.bin +3 -0
- rng_state.pth +3 -0
- scheduler.pt +3 -0
- special_tokens_map.json +15 -0
- tokenizer.json +0 -0
- tokenizer_config.json +23 -0
- trainer_state.json +232 -0
- training_args.bin +3 -0
.gitattributes
CHANGED
@@ -2,34 +2,27 @@
|
|
2 |
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
2 |
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
|
|
5 |
*.ftz filter=lfs diff=lfs merge=lfs -text
|
6 |
*.gz filter=lfs diff=lfs merge=lfs -text
|
7 |
*.h5 filter=lfs diff=lfs merge=lfs -text
|
8 |
*.joblib filter=lfs diff=lfs merge=lfs -text
|
9 |
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
|
|
10 |
*.model filter=lfs diff=lfs merge=lfs -text
|
11 |
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
12 |
*.onnx filter=lfs diff=lfs merge=lfs -text
|
13 |
*.ot filter=lfs diff=lfs merge=lfs -text
|
14 |
*.parquet filter=lfs diff=lfs merge=lfs -text
|
15 |
*.pb filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
16 |
*.pt filter=lfs diff=lfs merge=lfs -text
|
17 |
*.pth filter=lfs diff=lfs merge=lfs -text
|
18 |
*.rar filter=lfs diff=lfs merge=lfs -text
|
|
|
19 |
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
20 |
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
|
|
21 |
*.tflite filter=lfs diff=lfs merge=lfs -text
|
22 |
*.tgz filter=lfs diff=lfs merge=lfs -text
|
23 |
*.wasm filter=lfs diff=lfs merge=lfs -text
|
24 |
*.xz filter=lfs diff=lfs merge=lfs -text
|
25 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
26 |
+
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
27 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
model.safetensors filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
|
3 |
+
tags:
|
4 |
+
- token-classification
|
5 |
+
datasets:
|
6 |
+
- djagatiya/ner-ontonotes-v5-eng-v4
|
7 |
+
widget:
|
8 |
+
- text: "On September 1st George won 1 dollar while watching Game of Thrones."
|
9 |
+
---
|
10 |
+
|
11 |
+
# (NER) ALBERT-base-v2 : conll2012_ontonotesv5-english-v4
|
12 |
+
|
13 |
+
This `ALBERT-base-v2` NER model was finetuned on `conll2012_ontonotesv5` version `english-v4` dataset. <br>
|
14 |
+
Check out [NER-System Repository](https://github.com/djagatiya/NER-System) for more information.
|
15 |
+
|
16 |
+
## Evaluation
|
17 |
+
- Precision: 86.20
|
18 |
+
- Recall: 86.18
|
19 |
+
- F1-Score: 86.19
|
20 |
+
|
21 |
+
> check out this [eval.log](eval.log) file for evaluation metrics and classification report.
|
22 |
+
|
23 |
+
```
|
24 |
+
precision recall f1-score support
|
25 |
+
|
26 |
+
CARDINAL 0.84 0.83 0.83 935
|
27 |
+
DATE 0.84 0.87 0.86 1602
|
28 |
+
EVENT 0.61 0.52 0.56 63
|
29 |
+
FAC 0.54 0.59 0.56 135
|
30 |
+
GPE 0.95 0.94 0.95 2240
|
31 |
+
LANGUAGE 0.85 0.50 0.63 22
|
32 |
+
LAW 0.56 0.57 0.57 40
|
33 |
+
LOC 0.61 0.65 0.63 179
|
34 |
+
MONEY 0.85 0.88 0.86 314
|
35 |
+
NORP 0.88 0.92 0.90 841
|
36 |
+
ORDINAL 0.78 0.86 0.81 195
|
37 |
+
ORG 0.84 0.81 0.82 1795
|
38 |
+
PERCENT 0.88 0.87 0.88 349
|
39 |
+
PERSON 0.94 0.92 0.93 1988
|
40 |
+
PRODUCT 0.57 0.53 0.55 76
|
41 |
+
QUANTITY 0.77 0.81 0.79 105
|
42 |
+
TIME 0.59 0.66 0.62 212
|
43 |
+
WORK_OF_ART 0.60 0.52 0.56 166
|
44 |
+
|
45 |
+
micro avg 0.86 0.86 0.86 11257
|
46 |
+
macro avg 0.75 0.74 0.74 11257
|
47 |
+
weighted avg 0.86 0.86 0.86 11257
|
48 |
+
```
|
config.json
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "albert-base-v2",
|
3 |
+
"architectures": [
|
4 |
+
"AlbertForTokenClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0,
|
7 |
+
"bos_token_id": 2,
|
8 |
+
"classifier_dropout_prob": 0.1,
|
9 |
+
"down_scale_factor": 1,
|
10 |
+
"embedding_size": 128,
|
11 |
+
"eos_token_id": 3,
|
12 |
+
"gap_size": 0,
|
13 |
+
"hidden_act": "gelu_new",
|
14 |
+
"hidden_dropout_prob": 0,
|
15 |
+
"hidden_size": 768,
|
16 |
+
"id2label": {
|
17 |
+
"0": "O",
|
18 |
+
"1": "B-PERSON",
|
19 |
+
"2": "I-PERSON",
|
20 |
+
"3": "B-NORP",
|
21 |
+
"4": "I-NORP",
|
22 |
+
"5": "B-FAC",
|
23 |
+
"6": "I-FAC",
|
24 |
+
"7": "B-ORG",
|
25 |
+
"8": "I-ORG",
|
26 |
+
"9": "B-GPE",
|
27 |
+
"10": "I-GPE",
|
28 |
+
"11": "B-LOC",
|
29 |
+
"12": "I-LOC",
|
30 |
+
"13": "B-PRODUCT",
|
31 |
+
"14": "I-PRODUCT",
|
32 |
+
"15": "B-DATE",
|
33 |
+
"16": "I-DATE",
|
34 |
+
"17": "B-TIME",
|
35 |
+
"18": "I-TIME",
|
36 |
+
"19": "B-PERCENT",
|
37 |
+
"20": "I-PERCENT",
|
38 |
+
"21": "B-MONEY",
|
39 |
+
"22": "I-MONEY",
|
40 |
+
"23": "B-QUANTITY",
|
41 |
+
"24": "I-QUANTITY",
|
42 |
+
"25": "B-ORDINAL",
|
43 |
+
"26": "I-ORDINAL",
|
44 |
+
"27": "B-CARDINAL",
|
45 |
+
"28": "I-CARDINAL",
|
46 |
+
"29": "B-EVENT",
|
47 |
+
"30": "I-EVENT",
|
48 |
+
"31": "B-WORK_OF_ART",
|
49 |
+
"32": "I-WORK_OF_ART",
|
50 |
+
"33": "B-LAW",
|
51 |
+
"34": "I-LAW",
|
52 |
+
"35": "B-LANGUAGE",
|
53 |
+
"36": "I-LANGUAGE"
|
54 |
+
},
|
55 |
+
"initializer_range": 0.02,
|
56 |
+
"inner_group_num": 1,
|
57 |
+
"intermediate_size": 3072,
|
58 |
+
"label2id": {
|
59 |
+
"B-CARDINAL": 27,
|
60 |
+
"B-DATE": 15,
|
61 |
+
"B-EVENT": 29,
|
62 |
+
"B-FAC": 5,
|
63 |
+
"B-GPE": 9,
|
64 |
+
"B-LANGUAGE": 35,
|
65 |
+
"B-LAW": 33,
|
66 |
+
"B-LOC": 11,
|
67 |
+
"B-MONEY": 21,
|
68 |
+
"B-NORP": 3,
|
69 |
+
"B-ORDINAL": 25,
|
70 |
+
"B-ORG": 7,
|
71 |
+
"B-PERCENT": 19,
|
72 |
+
"B-PERSON": 1,
|
73 |
+
"B-PRODUCT": 13,
|
74 |
+
"B-QUANTITY": 23,
|
75 |
+
"B-TIME": 17,
|
76 |
+
"B-WORK_OF_ART": 31,
|
77 |
+
"I-CARDINAL": 28,
|
78 |
+
"I-DATE": 16,
|
79 |
+
"I-EVENT": 30,
|
80 |
+
"I-FAC": 6,
|
81 |
+
"I-GPE": 10,
|
82 |
+
"I-LANGUAGE": 36,
|
83 |
+
"I-LAW": 34,
|
84 |
+
"I-LOC": 12,
|
85 |
+
"I-MONEY": 22,
|
86 |
+
"I-NORP": 4,
|
87 |
+
"I-ORDINAL": 26,
|
88 |
+
"I-ORG": 8,
|
89 |
+
"I-PERCENT": 20,
|
90 |
+
"I-PERSON": 2,
|
91 |
+
"I-PRODUCT": 14,
|
92 |
+
"I-QUANTITY": 24,
|
93 |
+
"I-TIME": 18,
|
94 |
+
"I-WORK_OF_ART": 32,
|
95 |
+
"O": 0
|
96 |
+
},
|
97 |
+
"layer_norm_eps": 1e-12,
|
98 |
+
"max_position_embeddings": 512,
|
99 |
+
"model_type": "albert",
|
100 |
+
"net_structure_type": 0,
|
101 |
+
"num_attention_heads": 12,
|
102 |
+
"num_hidden_groups": 1,
|
103 |
+
"num_hidden_layers": 12,
|
104 |
+
"num_memory_blocks": 0,
|
105 |
+
"pad_token_id": 0,
|
106 |
+
"position_embedding_type": "absolute",
|
107 |
+
"torch_dtype": "float32",
|
108 |
+
"transformers_version": "4.20.0",
|
109 |
+
"type_vocab_size": 2,
|
110 |
+
"vocab_size": 30000
|
111 |
+
}
|
eval.log
ADDED
@@ -0,0 +1,234 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2022-07-03 15:51:20,416 - __main__ - INFO - Label List:['O', 'B-PERSON', 'I-PERSON', 'B-NORP', 'I-NORP', 'B-FAC', 'I-FAC', 'B-ORG', 'I-ORG', 'B-GPE', 'I-GPE', 'B-LOC', 'I-LOC', 'B-PRODUCT', 'I-PRODUCT', 'B-DATE', 'I-DATE', 'B-TIME', 'I-TIME', 'B-PERCENT', 'I-PERCENT', 'B-MONEY', 'I-MONEY', 'B-QUANTITY', 'I-QUANTITY', 'B-ORDINAL', 'I-ORDINAL', 'B-CARDINAL', 'I-CARDINAL', 'B-EVENT', 'I-EVENT', 'B-WORK_OF_ART', 'I-WORK_OF_ART', 'B-LAW', 'I-LAW', 'B-LANGUAGE', 'I-LANGUAGE']
|
2 |
+
2022-07-03 15:51:26,630 - __main__ - INFO - Dataset({
|
3 |
+
features: ['id', 'words', 'ner_tags'],
|
4 |
+
num_rows: 75187
|
5 |
+
})
|
6 |
+
2022-07-03 15:51:27,367 - __main__ - INFO - Dataset({
|
7 |
+
features: ['id', 'words', 'ner_tags'],
|
8 |
+
num_rows: 9479
|
9 |
+
})
|
10 |
+
2022-07-03 15:51:27,370 - transformers.tokenization_utils_base - INFO - Didn't find file models/albert-base-v2_1656839871.089586/checkpoint-14100/spiece.model. We won't load it.
|
11 |
+
2022-07-03 15:51:27,370 - transformers.tokenization_utils_base - INFO - Didn't find file models/albert-base-v2_1656839871.089586/checkpoint-14100/added_tokens.json. We won't load it.
|
12 |
+
2022-07-03 15:51:27,371 - transformers.tokenization_utils_base - INFO - loading file None
|
13 |
+
2022-07-03 15:51:27,371 - transformers.tokenization_utils_base - INFO - loading file models/albert-base-v2_1656839871.089586/checkpoint-14100/tokenizer.json
|
14 |
+
2022-07-03 15:51:27,371 - transformers.tokenization_utils_base - INFO - loading file None
|
15 |
+
2022-07-03 15:51:27,371 - transformers.tokenization_utils_base - INFO - loading file models/albert-base-v2_1656839871.089586/checkpoint-14100/special_tokens_map.json
|
16 |
+
2022-07-03 15:51:27,372 - transformers.tokenization_utils_base - INFO - loading file models/albert-base-v2_1656839871.089586/checkpoint-14100/tokenizer_config.json
|
17 |
+
2022-07-03 15:51:27,422 - __main__ - INFO - {'input_ids': [[2, 98, 825, 16, 1912, 13, 60, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [2, 95, 22719, 102, 10275, 42, 20, 1455, 21, 621, 1322, 16, 464, 998, 13, 9, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [2, 13, 14178, 595, 19045, 27, 14, 374, 1073, 16, 998, 13, 45, 10987, 4584, 16, 5466, 7065, 1286, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [2, 1288, 2263, 27, 5466, 7065, 1286, 25, 14, 4908, 20, 14, 1874, 12272, 4632, 13, 9, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [2, 32, 25, 1869, 16, 21, 1256, 13, 18, 14305, 13, 15, 2277, 6621, 1355, 13, 15, 21, 2329, 560, 5515, 17, 13339, 1710, 13, 15, 17, 14, 374, 769, 13, 15, 497, 89, 564, 13, 9, 3]], 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]}
|
18 |
+
2022-07-03 15:51:27,422 - __main__ - INFO - ['[CLS]', '▁what', '▁kind', '▁of', '▁memory', '▁', '?', '[SEP]', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
|
19 |
+
2022-07-03 15:51:27,422 - __main__ - INFO - ['[CLS]', '▁we', '▁respectful', 'ly', '▁invite', '▁you', '▁to', '▁watch', '▁a', '▁special', '▁edition', '▁of', '▁across', '▁china', '▁', '.', '[SEP]', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
|
20 |
+
2022-07-03 15:51:27,423 - __main__ - INFO - ['[CLS]', '▁', 'ww', '▁ii', '▁landmarks', '▁on', '▁the', '▁great', '▁earth', '▁of', '▁china', '▁', ':', '▁eternal', '▁memories', '▁of', '▁tai', 'hang', '▁mountain', '[SEP]', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
|
21 |
+
2022-07-03 15:51:27,423 - __main__ - INFO - ['[CLS]', '▁standing', '▁tall', '▁on', '▁tai', 'hang', '▁mountain', '▁is', '▁the', '▁monument', '▁to', '▁the', '▁hundred', '▁regiments', '▁offensive', '▁', '.', '[SEP]', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
|
22 |
+
2022-07-03 15:51:27,423 - __main__ - INFO - ['[CLS]', '▁it', '▁is', '▁composed', '▁of', '▁a', '▁primary', '▁', 's', 'tele', '▁', ',', '▁secondary', '▁ste', 'les', '▁', ',', '▁a', '▁huge', '▁round', '▁sculpture', '▁and', '▁beacon', '▁tower', '▁', ',', '▁and', '▁the', '▁great', '▁wall', '▁', ',', '▁among', '▁other', '▁things', '▁', '.', '[SEP]']
|
23 |
+
2022-07-03 15:51:27,423 - __main__ - INFO - -------------
|
24 |
+
2022-07-03 15:51:27,423 - __main__ - INFO - ['[CLS]', '▁we', '▁respectful', 'ly', '▁invite', '▁you', '▁to', '▁watch', '▁a', '▁special', '▁edition', '▁of', '▁across', '▁china', '▁', '.', '[SEP]', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
|
25 |
+
2022-07-03 15:51:27,423 - __main__ - INFO - [None, 0, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]
|
26 |
+
2022-07-03 15:51:27,427 - datasets.fingerprint - WARNING - Parameter 'function'=<function tokenize_and_align_labels at 0x7f8c9a20af70> of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only showed once. Subsequent hashing failures won't be showed.
|
27 |
+
2022-07-03 15:51:32,943 - __main__ - INFO - {'id': [0, 1, 2, 3, 4], 'words': [['What', 'kind', 'of', 'memory', '?'], ['We', 'respectfully', 'invite', 'you', 'to', 'watch', 'a', 'special', 'edition', 'of', 'Across', 'China', '.'], ['WW', 'II', 'Landmarks', 'on', 'the', 'Great', 'Earth', 'of', 'China', ':', 'Eternal', 'Memories', 'of', 'Taihang', 'Mountain'], ['Standing', 'tall', 'on', 'Taihang', 'Mountain', 'is', 'the', 'Monument', 'to', 'the', 'Hundred', 'Regiments', 'Offensive', '.'], ['It', 'is', 'composed', 'of', 'a', 'primary', 'stele', ',', 'secondary', 'steles', ',', 'a', 'huge', 'round', 'sculpture', 'and', 'beacon', 'tower', ',', 'and', 'the', 'Great', 'Wall', ',', 'among', 'other', 'things', '.']], 'ner_tags': [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 0], [31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32], [0, 0, 0, 11, 12, 0, 31, 32, 32, 32, 32, 32, 32, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31, 32, 32, 0, 0, 0, 0, 0]], 'input_ids': [[2, 98, 825, 16, 1912, 13, 60, 3], [2, 95, 22719, 102, 10275, 42, 20, 1455, 21, 621, 1322, 16, 464, 998, 13, 9, 3], [2, 13, 14178, 595, 19045, 27, 14, 374, 1073, 16, 998, 13, 45, 10987, 4584, 16, 5466, 7065, 1286, 3], [2, 1288, 2263, 27, 5466, 7065, 1286, 25, 14, 4908, 20, 14, 1874, 12272, 4632, 13, 9, 3], [2, 32, 25, 1869, 16, 21, 1256, 13, 18, 14305, 13, 15, 2277, 6621, 1355, 13, 15, 21, 2329, 560, 5515, 17, 13339, 1710, 13, 15, 17, 14, 374, 769, 13, 15, 497, 89, 564, 13, 9, 3]], 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], 'labels': [[-100, 0, 0, 0, 0, 0, -100, -100], [-100, 0, 0, -100, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 0, -100, -100], [-100, 31, -100, 32, 32, 32, 32, 32, 32, 32, 32, 32, -100, 32, 32, 32, 32, -100, 32, -100], [-100, 0, 0, 0, 11, -100, 12, 0, 31, 32, 32, 32, 32, 32, 32, 0, -100, -100], [-100, 0, 0, 0, 0, 0, 0, 0, -100, -100, 0, -100, 0, 0, -100, 0, -100, 0, 0, 0, 0, 0, 0, 0, 0, -100, 0, 31, 32, 32, 0, -100, 0, 0, 0, 0, -100, -100]]}
|
28 |
+
2022-07-03 15:51:35,822 - transformers.configuration_utils - INFO - loading configuration file models/albert-base-v2_1656839871.089586/checkpoint-14100/config.json
|
29 |
+
2022-07-03 15:51:35,828 - transformers.configuration_utils - INFO - Model config AlbertConfig {
|
30 |
+
"_name_or_path": "models/albert-base-v2_1656839871.089586/checkpoint-14100",
|
31 |
+
"architectures": [
|
32 |
+
"AlbertForTokenClassification"
|
33 |
+
],
|
34 |
+
"attention_probs_dropout_prob": 0,
|
35 |
+
"bos_token_id": 2,
|
36 |
+
"classifier_dropout_prob": 0.1,
|
37 |
+
"down_scale_factor": 1,
|
38 |
+
"embedding_size": 128,
|
39 |
+
"eos_token_id": 3,
|
40 |
+
"gap_size": 0,
|
41 |
+
"hidden_act": "gelu_new",
|
42 |
+
"hidden_dropout_prob": 0,
|
43 |
+
"hidden_size": 768,
|
44 |
+
"id2label": {
|
45 |
+
"0": "O",
|
46 |
+
"1": "B-PERSON",
|
47 |
+
"2": "I-PERSON",
|
48 |
+
"3": "B-NORP",
|
49 |
+
"4": "I-NORP",
|
50 |
+
"5": "B-FAC",
|
51 |
+
"6": "I-FAC",
|
52 |
+
"7": "B-ORG",
|
53 |
+
"8": "I-ORG",
|
54 |
+
"9": "B-GPE",
|
55 |
+
"10": "I-GPE",
|
56 |
+
"11": "B-LOC",
|
57 |
+
"12": "I-LOC",
|
58 |
+
"13": "B-PRODUCT",
|
59 |
+
"14": "I-PRODUCT",
|
60 |
+
"15": "B-DATE",
|
61 |
+
"16": "I-DATE",
|
62 |
+
"17": "B-TIME",
|
63 |
+
"18": "I-TIME",
|
64 |
+
"19": "B-PERCENT",
|
65 |
+
"20": "I-PERCENT",
|
66 |
+
"21": "B-MONEY",
|
67 |
+
"22": "I-MONEY",
|
68 |
+
"23": "B-QUANTITY",
|
69 |
+
"24": "I-QUANTITY",
|
70 |
+
"25": "B-ORDINAL",
|
71 |
+
"26": "I-ORDINAL",
|
72 |
+
"27": "B-CARDINAL",
|
73 |
+
"28": "I-CARDINAL",
|
74 |
+
"29": "B-EVENT",
|
75 |
+
"30": "I-EVENT",
|
76 |
+
"31": "B-WORK_OF_ART",
|
77 |
+
"32": "I-WORK_OF_ART",
|
78 |
+
"33": "B-LAW",
|
79 |
+
"34": "I-LAW",
|
80 |
+
"35": "B-LANGUAGE",
|
81 |
+
"36": "I-LANGUAGE"
|
82 |
+
},
|
83 |
+
"initializer_range": 0.02,
|
84 |
+
"inner_group_num": 1,
|
85 |
+
"intermediate_size": 3072,
|
86 |
+
"label2id": {
|
87 |
+
"B-CARDINAL": 27,
|
88 |
+
"B-DATE": 15,
|
89 |
+
"B-EVENT": 29,
|
90 |
+
"B-FAC": 5,
|
91 |
+
"B-GPE": 9,
|
92 |
+
"B-LANGUAGE": 35,
|
93 |
+
"B-LAW": 33,
|
94 |
+
"B-LOC": 11,
|
95 |
+
"B-MONEY": 21,
|
96 |
+
"B-NORP": 3,
|
97 |
+
"B-ORDINAL": 25,
|
98 |
+
"B-ORG": 7,
|
99 |
+
"B-PERCENT": 19,
|
100 |
+
"B-PERSON": 1,
|
101 |
+
"B-PRODUCT": 13,
|
102 |
+
"B-QUANTITY": 23,
|
103 |
+
"B-TIME": 17,
|
104 |
+
"B-WORK_OF_ART": 31,
|
105 |
+
"I-CARDINAL": 28,
|
106 |
+
"I-DATE": 16,
|
107 |
+
"I-EVENT": 30,
|
108 |
+
"I-FAC": 6,
|
109 |
+
"I-GPE": 10,
|
110 |
+
"I-LANGUAGE": 36,
|
111 |
+
"I-LAW": 34,
|
112 |
+
"I-LOC": 12,
|
113 |
+
"I-MONEY": 22,
|
114 |
+
"I-NORP": 4,
|
115 |
+
"I-ORDINAL": 26,
|
116 |
+
"I-ORG": 8,
|
117 |
+
"I-PERCENT": 20,
|
118 |
+
"I-PERSON": 2,
|
119 |
+
"I-PRODUCT": 14,
|
120 |
+
"I-QUANTITY": 24,
|
121 |
+
"I-TIME": 18,
|
122 |
+
"I-WORK_OF_ART": 32,
|
123 |
+
"O": 0
|
124 |
+
},
|
125 |
+
"layer_norm_eps": 1e-12,
|
126 |
+
"max_position_embeddings": 512,
|
127 |
+
"model_type": "albert",
|
128 |
+
"net_structure_type": 0,
|
129 |
+
"num_attention_heads": 12,
|
130 |
+
"num_hidden_groups": 1,
|
131 |
+
"num_hidden_layers": 12,
|
132 |
+
"num_memory_blocks": 0,
|
133 |
+
"pad_token_id": 0,
|
134 |
+
"position_embedding_type": "absolute",
|
135 |
+
"torch_dtype": "float32",
|
136 |
+
"transformers_version": "4.20.0",
|
137 |
+
"type_vocab_size": 2,
|
138 |
+
"vocab_size": 30000
|
139 |
+
}
|
140 |
+
|
141 |
+
2022-07-03 15:51:35,912 - transformers.modeling_utils - INFO - loading weights file models/albert-base-v2_1656839871.089586/checkpoint-14100/pytorch_model.bin
|
142 |
+
2022-07-03 15:51:36,021 - transformers.modeling_utils - INFO - All model checkpoint weights were used when initializing AlbertForTokenClassification.
|
143 |
+
|
144 |
+
2022-07-03 15:51:36,022 - transformers.modeling_utils - INFO - All the weights of AlbertForTokenClassification were initialized from the model checkpoint at models/albert-base-v2_1656839871.089586/checkpoint-14100.
|
145 |
+
If your task is similar to the task the model of the checkpoint was trained on, you can already use AlbertForTokenClassification for predictions without further training.
|
146 |
+
2022-07-03 15:51:36,022 - __main__ - INFO - AlbertForTokenClassification(
|
147 |
+
(albert): AlbertModel(
|
148 |
+
(embeddings): AlbertEmbeddings(
|
149 |
+
(word_embeddings): Embedding(30000, 128, padding_idx=0)
|
150 |
+
(position_embeddings): Embedding(512, 128)
|
151 |
+
(token_type_embeddings): Embedding(2, 128)
|
152 |
+
(LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
|
153 |
+
(dropout): Dropout(p=0, inplace=False)
|
154 |
+
)
|
155 |
+
(encoder): AlbertTransformer(
|
156 |
+
(embedding_hidden_mapping_in): Linear(in_features=128, out_features=768, bias=True)
|
157 |
+
(albert_layer_groups): ModuleList(
|
158 |
+
(0): AlbertLayerGroup(
|
159 |
+
(albert_layers): ModuleList(
|
160 |
+
(0): AlbertLayer(
|
161 |
+
(full_layer_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
|
162 |
+
(attention): AlbertAttention(
|
163 |
+
(query): Linear(in_features=768, out_features=768, bias=True)
|
164 |
+
(key): Linear(in_features=768, out_features=768, bias=True)
|
165 |
+
(value): Linear(in_features=768, out_features=768, bias=True)
|
166 |
+
(attention_dropout): Dropout(p=0, inplace=False)
|
167 |
+
(output_dropout): Dropout(p=0, inplace=False)
|
168 |
+
(dense): Linear(in_features=768, out_features=768, bias=True)
|
169 |
+
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
|
170 |
+
)
|
171 |
+
(ffn): Linear(in_features=768, out_features=3072, bias=True)
|
172 |
+
(ffn_output): Linear(in_features=3072, out_features=768, bias=True)
|
173 |
+
(activation): NewGELUActivation()
|
174 |
+
(dropout): Dropout(p=0, inplace=False)
|
175 |
+
)
|
176 |
+
)
|
177 |
+
)
|
178 |
+
)
|
179 |
+
)
|
180 |
+
)
|
181 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
182 |
+
(classifier): Linear(in_features=768, out_features=37, bias=True)
|
183 |
+
)
|
184 |
+
2022-07-03 15:51:36,022 - __main__ - INFO - CONFIGS:{
|
185 |
+
"output_dir": "./models/finetuned-base-uncased_1656843680.4141676",
|
186 |
+
"per_device_train_batch_size": 16,
|
187 |
+
"per_device_eval_batch_size": 16,
|
188 |
+
"save_total_limit": 2,
|
189 |
+
"num_train_epochs": 3,
|
190 |
+
"seed": 1,
|
191 |
+
"load_best_model_at_end": true,
|
192 |
+
"evaluation_strategy": "epoch",
|
193 |
+
"save_strategy": "epoch",
|
194 |
+
"learning_rate": 2e-05,
|
195 |
+
"weight_decay": 0.01,
|
196 |
+
"logging_steps": 469.0
|
197 |
+
}
|
198 |
+
2022-07-03 15:51:36,023 - transformers.training_args - INFO - PyTorch: setting up devices
|
199 |
+
2022-07-03 15:51:36,070 - transformers.training_args - INFO - The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
|
200 |
+
2022-07-03 15:51:36,075 - __main__ - INFO - [[ MODEL EVALUATION ]]
|
201 |
+
2022-07-03 15:51:36,075 - transformers.trainer - INFO - The following columns in the evaluation set don't have a corresponding argument in `AlbertForTokenClassification.forward` and have been ignored: id, words, ner_tags. If id, words, ner_tags are not expected by `AlbertForTokenClassification.forward`, you can safely ignore this message.
|
202 |
+
2022-07-03 15:51:36,077 - transformers.trainer - INFO - ***** Running Evaluation *****
|
203 |
+
2022-07-03 15:51:36,077 - transformers.trainer - INFO - Num examples = 9479
|
204 |
+
2022-07-03 15:51:36,078 - transformers.trainer - INFO - Batch size = 16
|
205 |
+
2022-07-03 16:02:02,467 - __main__ - INFO - {'eval_loss': 0.08666322380304337, 'eval_precision': 0.8620168813860506, 'eval_recall': 0.8618637292351425, 'eval_f1': 0.8619402985074628, 'eval_accuracy': 0.9780515276066022, 'eval_runtime': 626.3804, 'eval_samples_per_second': 15.133, 'eval_steps_per_second': 0.947, 'step': 0}
|
206 |
+
2022-07-03 16:02:02,468 - transformers.trainer - INFO - The following columns in the test set don't have a corresponding argument in `AlbertForTokenClassification.forward` and have been ignored: id, words, ner_tags. If id, words, ner_tags are not expected by `AlbertForTokenClassification.forward`, you can safely ignore this message.
|
207 |
+
2022-07-03 16:02:02,471 - transformers.trainer - INFO - ***** Running Prediction *****
|
208 |
+
2022-07-03 16:02:02,471 - transformers.trainer - INFO - Num examples = 9479
|
209 |
+
2022-07-03 16:02:02,471 - transformers.trainer - INFO - Batch size = 16
|
210 |
+
2022-07-03 16:12:35,933 - __main__ - INFO - precision recall f1-score support
|
211 |
+
|
212 |
+
CARDINAL 0.84 0.83 0.83 935
|
213 |
+
DATE 0.84 0.87 0.86 1602
|
214 |
+
EVENT 0.61 0.52 0.56 63
|
215 |
+
FAC 0.54 0.59 0.56 135
|
216 |
+
GPE 0.95 0.94 0.95 2240
|
217 |
+
LANGUAGE 0.85 0.50 0.63 22
|
218 |
+
LAW 0.56 0.57 0.57 40
|
219 |
+
LOC 0.61 0.65 0.63 179
|
220 |
+
MONEY 0.85 0.88 0.86 314
|
221 |
+
NORP 0.88 0.92 0.90 841
|
222 |
+
ORDINAL 0.78 0.86 0.81 195
|
223 |
+
ORG 0.84 0.81 0.82 1795
|
224 |
+
PERCENT 0.88 0.87 0.88 349
|
225 |
+
PERSON 0.94 0.92 0.93 1988
|
226 |
+
PRODUCT 0.57 0.53 0.55 76
|
227 |
+
QUANTITY 0.77 0.81 0.79 105
|
228 |
+
TIME 0.59 0.66 0.62 212
|
229 |
+
WORK_OF_ART 0.60 0.52 0.56 166
|
230 |
+
|
231 |
+
micro avg 0.86 0.86 0.86 11257
|
232 |
+
macro avg 0.75 0.74 0.74 11257
|
233 |
+
weighted avg 0.86 0.86 0.86 11257
|
234 |
+
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4832c46d69c0f42a62c6af8ccfb9b6bb46f2324d23dd612cf69faa839b423eb8
|
3 |
+
size 44493212
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8de2b6a7780b1ca9b38a0a34f6bd5e8003378e955a637b4ce4e3c97910f9f7f8
|
3 |
+
size 44498135
|
rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7ceb5f98e7b998ad8337ebed923c3ea142c9ee62b5bd132058e0fd660c83dd81
|
3 |
+
size 14503
|
scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0271a42487a5a379bf223d761cfc585dcba1a6069b012d2a634c31973187a24e
|
3 |
+
size 623
|
special_tokens_map.json
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "[CLS]",
|
3 |
+
"cls_token": "[CLS]",
|
4 |
+
"eos_token": "[SEP]",
|
5 |
+
"mask_token": {
|
6 |
+
"content": "[MASK]",
|
7 |
+
"lstrip": true,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false
|
11 |
+
},
|
12 |
+
"pad_token": "<pad>",
|
13 |
+
"sep_token": "[SEP]",
|
14 |
+
"unk_token": "<unk>"
|
15 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "[CLS]",
|
3 |
+
"cls_token": "[CLS]",
|
4 |
+
"do_lower_case": true,
|
5 |
+
"eos_token": "[SEP]",
|
6 |
+
"keep_accents": false,
|
7 |
+
"mask_token": {
|
8 |
+
"__type": "AddedToken",
|
9 |
+
"content": "[MASK]",
|
10 |
+
"lstrip": true,
|
11 |
+
"normalized": false,
|
12 |
+
"rstrip": false,
|
13 |
+
"single_word": false
|
14 |
+
},
|
15 |
+
"model_max_length": 512,
|
16 |
+
"name_or_path": "albert-base-v2",
|
17 |
+
"pad_token": "<pad>",
|
18 |
+
"remove_space": true,
|
19 |
+
"sep_token": "[SEP]",
|
20 |
+
"special_tokens_map_file": null,
|
21 |
+
"tokenizer_class": "AlbertTokenizer",
|
22 |
+
"unk_token": "<unk>"
|
23 |
+
}
|
trainer_state.json
ADDED
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.08603910356760025,
|
3 |
+
"best_model_checkpoint": "./models/albert-base-v2_1656839871.089586/checkpoint-9400",
|
4 |
+
"epoch": 3.0,
|
5 |
+
"global_step": 14100,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 0.1,
|
12 |
+
"learning_rate": 1.9334751773049647e-05,
|
13 |
+
"loss": 0.2491,
|
14 |
+
"step": 469
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"epoch": 0.2,
|
18 |
+
"learning_rate": 1.866950354609929e-05,
|
19 |
+
"loss": 0.1368,
|
20 |
+
"step": 938
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"epoch": 0.3,
|
24 |
+
"learning_rate": 1.8004255319148936e-05,
|
25 |
+
"loss": 0.1139,
|
26 |
+
"step": 1407
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"epoch": 0.4,
|
30 |
+
"learning_rate": 1.7339007092198585e-05,
|
31 |
+
"loss": 0.1082,
|
32 |
+
"step": 1876
|
33 |
+
},
|
34 |
+
{
|
35 |
+
"epoch": 0.5,
|
36 |
+
"learning_rate": 1.667375886524823e-05,
|
37 |
+
"loss": 0.0974,
|
38 |
+
"step": 2345
|
39 |
+
},
|
40 |
+
{
|
41 |
+
"epoch": 0.6,
|
42 |
+
"learning_rate": 1.6008510638297874e-05,
|
43 |
+
"loss": 0.0892,
|
44 |
+
"step": 2814
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.7,
|
48 |
+
"learning_rate": 1.534326241134752e-05,
|
49 |
+
"loss": 0.0941,
|
50 |
+
"step": 3283
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"epoch": 0.8,
|
54 |
+
"learning_rate": 1.4678014184397164e-05,
|
55 |
+
"loss": 0.0826,
|
56 |
+
"step": 3752
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"epoch": 0.9,
|
60 |
+
"learning_rate": 1.401276595744681e-05,
|
61 |
+
"loss": 0.0806,
|
62 |
+
"step": 4221
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"epoch": 1.0,
|
66 |
+
"learning_rate": 1.3347517730496456e-05,
|
67 |
+
"loss": 0.082,
|
68 |
+
"step": 4690
|
69 |
+
},
|
70 |
+
{
|
71 |
+
"epoch": 1.0,
|
72 |
+
"eval_accuracy": 0.9746784684424369,
|
73 |
+
"eval_f1": 0.8338050592605697,
|
74 |
+
"eval_loss": 0.0911484956741333,
|
75 |
+
"eval_precision": 0.8302069572875386,
|
76 |
+
"eval_recall": 0.8374344852092032,
|
77 |
+
"eval_runtime": 37.0605,
|
78 |
+
"eval_samples_per_second": 255.771,
|
79 |
+
"eval_steps_per_second": 16.001,
|
80 |
+
"step": 4700
|
81 |
+
},
|
82 |
+
{
|
83 |
+
"epoch": 1.1,
|
84 |
+
"learning_rate": 1.26822695035461e-05,
|
85 |
+
"loss": 0.0686,
|
86 |
+
"step": 5159
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 1.2,
|
90 |
+
"learning_rate": 1.2017021276595745e-05,
|
91 |
+
"loss": 0.0645,
|
92 |
+
"step": 5628
|
93 |
+
},
|
94 |
+
{
|
95 |
+
"epoch": 1.3,
|
96 |
+
"learning_rate": 1.1351773049645392e-05,
|
97 |
+
"loss": 0.0638,
|
98 |
+
"step": 6097
|
99 |
+
},
|
100 |
+
{
|
101 |
+
"epoch": 1.4,
|
102 |
+
"learning_rate": 1.0686524822695037e-05,
|
103 |
+
"loss": 0.0606,
|
104 |
+
"step": 6566
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"epoch": 1.5,
|
108 |
+
"learning_rate": 1.0021276595744682e-05,
|
109 |
+
"loss": 0.0616,
|
110 |
+
"step": 7035
|
111 |
+
},
|
112 |
+
{
|
113 |
+
"epoch": 1.6,
|
114 |
+
"learning_rate": 9.356028368794327e-06,
|
115 |
+
"loss": 0.0579,
|
116 |
+
"step": 7504
|
117 |
+
},
|
118 |
+
{
|
119 |
+
"epoch": 1.7,
|
120 |
+
"learning_rate": 8.690780141843971e-06,
|
121 |
+
"loss": 0.0561,
|
122 |
+
"step": 7973
|
123 |
+
},
|
124 |
+
{
|
125 |
+
"epoch": 1.8,
|
126 |
+
"learning_rate": 8.025531914893618e-06,
|
127 |
+
"loss": 0.0565,
|
128 |
+
"step": 8442
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.9,
|
132 |
+
"learning_rate": 7.360283687943263e-06,
|
133 |
+
"loss": 0.0551,
|
134 |
+
"step": 8911
|
135 |
+
},
|
136 |
+
{
|
137 |
+
"epoch": 2.0,
|
138 |
+
"learning_rate": 6.695035460992908e-06,
|
139 |
+
"loss": 0.0592,
|
140 |
+
"step": 9380
|
141 |
+
},
|
142 |
+
{
|
143 |
+
"epoch": 2.0,
|
144 |
+
"eval_accuracy": 0.9760701501954842,
|
145 |
+
"eval_f1": 0.8429752066115702,
|
146 |
+
"eval_loss": 0.08603910356760025,
|
147 |
+
"eval_precision": 0.8525483783047152,
|
148 |
+
"eval_recall": 0.8336146397796926,
|
149 |
+
"eval_runtime": 36.5619,
|
150 |
+
"eval_samples_per_second": 259.259,
|
151 |
+
"eval_steps_per_second": 16.219,
|
152 |
+
"step": 9400
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"epoch": 2.1,
|
156 |
+
"learning_rate": 6.029787234042554e-06,
|
157 |
+
"loss": 0.0406,
|
158 |
+
"step": 9849
|
159 |
+
},
|
160 |
+
{
|
161 |
+
"epoch": 2.2,
|
162 |
+
"learning_rate": 5.3645390070921985e-06,
|
163 |
+
"loss": 0.0387,
|
164 |
+
"step": 10318
|
165 |
+
},
|
166 |
+
{
|
167 |
+
"epoch": 2.3,
|
168 |
+
"learning_rate": 4.699290780141844e-06,
|
169 |
+
"loss": 0.0399,
|
170 |
+
"step": 10787
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 2.39,
|
174 |
+
"learning_rate": 4.03404255319149e-06,
|
175 |
+
"loss": 0.0407,
|
176 |
+
"step": 11256
|
177 |
+
},
|
178 |
+
{
|
179 |
+
"epoch": 2.49,
|
180 |
+
"learning_rate": 3.368794326241135e-06,
|
181 |
+
"loss": 0.0394,
|
182 |
+
"step": 11725
|
183 |
+
},
|
184 |
+
{
|
185 |
+
"epoch": 2.59,
|
186 |
+
"learning_rate": 2.70354609929078e-06,
|
187 |
+
"loss": 0.0369,
|
188 |
+
"step": 12194
|
189 |
+
},
|
190 |
+
{
|
191 |
+
"epoch": 2.69,
|
192 |
+
"learning_rate": 2.0382978723404254e-06,
|
193 |
+
"loss": 0.0408,
|
194 |
+
"step": 12663
|
195 |
+
},
|
196 |
+
{
|
197 |
+
"epoch": 2.79,
|
198 |
+
"learning_rate": 1.373049645390071e-06,
|
199 |
+
"loss": 0.0363,
|
200 |
+
"step": 13132
|
201 |
+
},
|
202 |
+
{
|
203 |
+
"epoch": 2.89,
|
204 |
+
"learning_rate": 7.078014184397164e-07,
|
205 |
+
"loss": 0.0392,
|
206 |
+
"step": 13601
|
207 |
+
},
|
208 |
+
{
|
209 |
+
"epoch": 2.99,
|
210 |
+
"learning_rate": 4.2553191489361707e-08,
|
211 |
+
"loss": 0.0392,
|
212 |
+
"step": 14070
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 3.0,
|
216 |
+
"eval_accuracy": 0.9780515276066022,
|
217 |
+
"eval_f1": 0.8619402985074628,
|
218 |
+
"eval_loss": 0.08666320890188217,
|
219 |
+
"eval_precision": 0.8620168813860506,
|
220 |
+
"eval_recall": 0.8618637292351425,
|
221 |
+
"eval_runtime": 36.7418,
|
222 |
+
"eval_samples_per_second": 257.989,
|
223 |
+
"eval_steps_per_second": 16.14,
|
224 |
+
"step": 14100
|
225 |
+
}
|
226 |
+
],
|
227 |
+
"max_steps": 14100,
|
228 |
+
"num_train_epochs": 3,
|
229 |
+
"total_flos": 580572089788770.0,
|
230 |
+
"trial_name": null,
|
231 |
+
"trial_params": null
|
232 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:15c7912e3146ffbf0b4944c488054f98500ed7228181655063dab2a1d36f7001
|
3 |
+
size 3311
|