Amein-Eskinder commited on
Commit
e542267
1 Parent(s): b3d0035

Updated model files

Browse files
.gitattributes CHANGED
@@ -2,34 +2,27 @@
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
  *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
  *.ftz filter=lfs diff=lfs merge=lfs -text
7
  *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
  *.model filter=lfs diff=lfs merge=lfs -text
13
  *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
  *.onnx filter=lfs diff=lfs merge=lfs -text
17
  *.ot filter=lfs diff=lfs merge=lfs -text
18
  *.parquet filter=lfs diff=lfs merge=lfs -text
19
  *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
  *.pt filter=lfs diff=lfs merge=lfs -text
23
  *.pth filter=lfs diff=lfs merge=lfs -text
24
  *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
  *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
  *.tflite filter=lfs diff=lfs merge=lfs -text
30
  *.tgz filter=lfs diff=lfs merge=lfs -text
31
  *.wasm filter=lfs diff=lfs merge=lfs -text
32
  *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
  *.bz2 filter=lfs diff=lfs merge=lfs -text
 
5
  *.ftz filter=lfs diff=lfs merge=lfs -text
6
  *.gz filter=lfs diff=lfs merge=lfs -text
7
  *.h5 filter=lfs diff=lfs merge=lfs -text
8
  *.joblib filter=lfs diff=lfs merge=lfs -text
9
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
 
10
  *.model filter=lfs diff=lfs merge=lfs -text
11
  *.msgpack filter=lfs diff=lfs merge=lfs -text
 
 
12
  *.onnx filter=lfs diff=lfs merge=lfs -text
13
  *.ot filter=lfs diff=lfs merge=lfs -text
14
  *.parquet filter=lfs diff=lfs merge=lfs -text
15
  *.pb filter=lfs diff=lfs merge=lfs -text
 
 
16
  *.pt filter=lfs diff=lfs merge=lfs -text
17
  *.pth filter=lfs diff=lfs merge=lfs -text
18
  *.rar filter=lfs diff=lfs merge=lfs -text
 
19
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
20
  *.tar.* filter=lfs diff=lfs merge=lfs -text
 
21
  *.tflite filter=lfs diff=lfs merge=lfs -text
22
  *.tgz filter=lfs diff=lfs merge=lfs -text
23
  *.wasm filter=lfs diff=lfs merge=lfs -text
24
  *.xz filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ model.safetensors filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+
3
+ tags:
4
+ - token-classification
5
+ datasets:
6
+ - djagatiya/ner-ontonotes-v5-eng-v4
7
+ widget:
8
+ - text: "On September 1st George won 1 dollar while watching Game of Thrones."
9
+ ---
10
+
11
+ # (NER) ALBERT-base-v2 : conll2012_ontonotesv5-english-v4
12
+
13
+ This `ALBERT-base-v2` NER model was finetuned on `conll2012_ontonotesv5` version `english-v4` dataset. <br>
14
+ Check out [NER-System Repository](https://github.com/djagatiya/NER-System) for more information.
15
+
16
+ ## Evaluation
17
+ - Precision: 86.20
18
+ - Recall: 86.18
19
+ - F1-Score: 86.19
20
+
21
+ > check out this [eval.log](eval.log) file for evaluation metrics and classification report.
22
+
23
+ ```
24
+ precision recall f1-score support
25
+
26
+ CARDINAL 0.84 0.83 0.83 935
27
+ DATE 0.84 0.87 0.86 1602
28
+ EVENT 0.61 0.52 0.56 63
29
+ FAC 0.54 0.59 0.56 135
30
+ GPE 0.95 0.94 0.95 2240
31
+ LANGUAGE 0.85 0.50 0.63 22
32
+ LAW 0.56 0.57 0.57 40
33
+ LOC 0.61 0.65 0.63 179
34
+ MONEY 0.85 0.88 0.86 314
35
+ NORP 0.88 0.92 0.90 841
36
+ ORDINAL 0.78 0.86 0.81 195
37
+ ORG 0.84 0.81 0.82 1795
38
+ PERCENT 0.88 0.87 0.88 349
39
+ PERSON 0.94 0.92 0.93 1988
40
+ PRODUCT 0.57 0.53 0.55 76
41
+ QUANTITY 0.77 0.81 0.79 105
42
+ TIME 0.59 0.66 0.62 212
43
+ WORK_OF_ART 0.60 0.52 0.56 166
44
+
45
+ micro avg 0.86 0.86 0.86 11257
46
+ macro avg 0.75 0.74 0.74 11257
47
+ weighted avg 0.86 0.86 0.86 11257
48
+ ```
config.json ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "albert-base-v2",
3
+ "architectures": [
4
+ "AlbertForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0,
7
+ "bos_token_id": 2,
8
+ "classifier_dropout_prob": 0.1,
9
+ "down_scale_factor": 1,
10
+ "embedding_size": 128,
11
+ "eos_token_id": 3,
12
+ "gap_size": 0,
13
+ "hidden_act": "gelu_new",
14
+ "hidden_dropout_prob": 0,
15
+ "hidden_size": 768,
16
+ "id2label": {
17
+ "0": "O",
18
+ "1": "B-PERSON",
19
+ "2": "I-PERSON",
20
+ "3": "B-NORP",
21
+ "4": "I-NORP",
22
+ "5": "B-FAC",
23
+ "6": "I-FAC",
24
+ "7": "B-ORG",
25
+ "8": "I-ORG",
26
+ "9": "B-GPE",
27
+ "10": "I-GPE",
28
+ "11": "B-LOC",
29
+ "12": "I-LOC",
30
+ "13": "B-PRODUCT",
31
+ "14": "I-PRODUCT",
32
+ "15": "B-DATE",
33
+ "16": "I-DATE",
34
+ "17": "B-TIME",
35
+ "18": "I-TIME",
36
+ "19": "B-PERCENT",
37
+ "20": "I-PERCENT",
38
+ "21": "B-MONEY",
39
+ "22": "I-MONEY",
40
+ "23": "B-QUANTITY",
41
+ "24": "I-QUANTITY",
42
+ "25": "B-ORDINAL",
43
+ "26": "I-ORDINAL",
44
+ "27": "B-CARDINAL",
45
+ "28": "I-CARDINAL",
46
+ "29": "B-EVENT",
47
+ "30": "I-EVENT",
48
+ "31": "B-WORK_OF_ART",
49
+ "32": "I-WORK_OF_ART",
50
+ "33": "B-LAW",
51
+ "34": "I-LAW",
52
+ "35": "B-LANGUAGE",
53
+ "36": "I-LANGUAGE"
54
+ },
55
+ "initializer_range": 0.02,
56
+ "inner_group_num": 1,
57
+ "intermediate_size": 3072,
58
+ "label2id": {
59
+ "B-CARDINAL": 27,
60
+ "B-DATE": 15,
61
+ "B-EVENT": 29,
62
+ "B-FAC": 5,
63
+ "B-GPE": 9,
64
+ "B-LANGUAGE": 35,
65
+ "B-LAW": 33,
66
+ "B-LOC": 11,
67
+ "B-MONEY": 21,
68
+ "B-NORP": 3,
69
+ "B-ORDINAL": 25,
70
+ "B-ORG": 7,
71
+ "B-PERCENT": 19,
72
+ "B-PERSON": 1,
73
+ "B-PRODUCT": 13,
74
+ "B-QUANTITY": 23,
75
+ "B-TIME": 17,
76
+ "B-WORK_OF_ART": 31,
77
+ "I-CARDINAL": 28,
78
+ "I-DATE": 16,
79
+ "I-EVENT": 30,
80
+ "I-FAC": 6,
81
+ "I-GPE": 10,
82
+ "I-LANGUAGE": 36,
83
+ "I-LAW": 34,
84
+ "I-LOC": 12,
85
+ "I-MONEY": 22,
86
+ "I-NORP": 4,
87
+ "I-ORDINAL": 26,
88
+ "I-ORG": 8,
89
+ "I-PERCENT": 20,
90
+ "I-PERSON": 2,
91
+ "I-PRODUCT": 14,
92
+ "I-QUANTITY": 24,
93
+ "I-TIME": 18,
94
+ "I-WORK_OF_ART": 32,
95
+ "O": 0
96
+ },
97
+ "layer_norm_eps": 1e-12,
98
+ "max_position_embeddings": 512,
99
+ "model_type": "albert",
100
+ "net_structure_type": 0,
101
+ "num_attention_heads": 12,
102
+ "num_hidden_groups": 1,
103
+ "num_hidden_layers": 12,
104
+ "num_memory_blocks": 0,
105
+ "pad_token_id": 0,
106
+ "position_embedding_type": "absolute",
107
+ "torch_dtype": "float32",
108
+ "transformers_version": "4.20.0",
109
+ "type_vocab_size": 2,
110
+ "vocab_size": 30000
111
+ }
eval.log ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2022-07-03 15:51:20,416 - __main__ - INFO - Label List:['O', 'B-PERSON', 'I-PERSON', 'B-NORP', 'I-NORP', 'B-FAC', 'I-FAC', 'B-ORG', 'I-ORG', 'B-GPE', 'I-GPE', 'B-LOC', 'I-LOC', 'B-PRODUCT', 'I-PRODUCT', 'B-DATE', 'I-DATE', 'B-TIME', 'I-TIME', 'B-PERCENT', 'I-PERCENT', 'B-MONEY', 'I-MONEY', 'B-QUANTITY', 'I-QUANTITY', 'B-ORDINAL', 'I-ORDINAL', 'B-CARDINAL', 'I-CARDINAL', 'B-EVENT', 'I-EVENT', 'B-WORK_OF_ART', 'I-WORK_OF_ART', 'B-LAW', 'I-LAW', 'B-LANGUAGE', 'I-LANGUAGE']
2
+ 2022-07-03 15:51:26,630 - __main__ - INFO - Dataset({
3
+ features: ['id', 'words', 'ner_tags'],
4
+ num_rows: 75187
5
+ })
6
+ 2022-07-03 15:51:27,367 - __main__ - INFO - Dataset({
7
+ features: ['id', 'words', 'ner_tags'],
8
+ num_rows: 9479
9
+ })
10
+ 2022-07-03 15:51:27,370 - transformers.tokenization_utils_base - INFO - Didn't find file models/albert-base-v2_1656839871.089586/checkpoint-14100/spiece.model. We won't load it.
11
+ 2022-07-03 15:51:27,370 - transformers.tokenization_utils_base - INFO - Didn't find file models/albert-base-v2_1656839871.089586/checkpoint-14100/added_tokens.json. We won't load it.
12
+ 2022-07-03 15:51:27,371 - transformers.tokenization_utils_base - INFO - loading file None
13
+ 2022-07-03 15:51:27,371 - transformers.tokenization_utils_base - INFO - loading file models/albert-base-v2_1656839871.089586/checkpoint-14100/tokenizer.json
14
+ 2022-07-03 15:51:27,371 - transformers.tokenization_utils_base - INFO - loading file None
15
+ 2022-07-03 15:51:27,371 - transformers.tokenization_utils_base - INFO - loading file models/albert-base-v2_1656839871.089586/checkpoint-14100/special_tokens_map.json
16
+ 2022-07-03 15:51:27,372 - transformers.tokenization_utils_base - INFO - loading file models/albert-base-v2_1656839871.089586/checkpoint-14100/tokenizer_config.json
17
+ 2022-07-03 15:51:27,422 - __main__ - INFO - {'input_ids': [[2, 98, 825, 16, 1912, 13, 60, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [2, 95, 22719, 102, 10275, 42, 20, 1455, 21, 621, 1322, 16, 464, 998, 13, 9, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [2, 13, 14178, 595, 19045, 27, 14, 374, 1073, 16, 998, 13, 45, 10987, 4584, 16, 5466, 7065, 1286, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [2, 1288, 2263, 27, 5466, 7065, 1286, 25, 14, 4908, 20, 14, 1874, 12272, 4632, 13, 9, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [2, 32, 25, 1869, 16, 21, 1256, 13, 18, 14305, 13, 15, 2277, 6621, 1355, 13, 15, 21, 2329, 560, 5515, 17, 13339, 1710, 13, 15, 17, 14, 374, 769, 13, 15, 497, 89, 564, 13, 9, 3]], 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]}
18
+ 2022-07-03 15:51:27,422 - __main__ - INFO - ['[CLS]', '▁what', '▁kind', '▁of', '▁memory', '▁', '?', '[SEP]', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
19
+ 2022-07-03 15:51:27,422 - __main__ - INFO - ['[CLS]', '▁we', '▁respectful', 'ly', '▁invite', '▁you', '▁to', '▁watch', '▁a', '▁special', '▁edition', '▁of', '▁across', '▁china', '▁', '.', '[SEP]', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
20
+ 2022-07-03 15:51:27,423 - __main__ - INFO - ['[CLS]', '▁', 'ww', '▁ii', '▁landmarks', '▁on', '▁the', '▁great', '▁earth', '▁of', '▁china', '▁', ':', '▁eternal', '▁memories', '▁of', '▁tai', 'hang', '▁mountain', '[SEP]', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
21
+ 2022-07-03 15:51:27,423 - __main__ - INFO - ['[CLS]', '▁standing', '▁tall', '▁on', '▁tai', 'hang', '▁mountain', '▁is', '▁the', '▁monument', '▁to', '▁the', '▁hundred', '▁regiments', '▁offensive', '▁', '.', '[SEP]', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
22
+ 2022-07-03 15:51:27,423 - __main__ - INFO - ['[CLS]', '▁it', '▁is', '▁composed', '▁of', '▁a', '▁primary', '▁', 's', 'tele', '▁', ',', '▁secondary', '▁ste', 'les', '▁', ',', '▁a', '▁huge', '▁round', '▁sculpture', '▁and', '▁beacon', '▁tower', '▁', ',', '▁and', '▁the', '▁great', '▁wall', '▁', ',', '▁among', '▁other', '▁things', '▁', '.', '[SEP]']
23
+ 2022-07-03 15:51:27,423 - __main__ - INFO - -------------
24
+ 2022-07-03 15:51:27,423 - __main__ - INFO - ['[CLS]', '▁we', '▁respectful', 'ly', '▁invite', '▁you', '▁to', '▁watch', '▁a', '▁special', '▁edition', '▁of', '▁across', '▁china', '▁', '.', '[SEP]', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
25
+ 2022-07-03 15:51:27,423 - __main__ - INFO - [None, 0, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]
26
+ 2022-07-03 15:51:27,427 - datasets.fingerprint - WARNING - Parameter 'function'=<function tokenize_and_align_labels at 0x7f8c9a20af70> of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only showed once. Subsequent hashing failures won't be showed.
27
+ 2022-07-03 15:51:32,943 - __main__ - INFO - {'id': [0, 1, 2, 3, 4], 'words': [['What', 'kind', 'of', 'memory', '?'], ['We', 'respectfully', 'invite', 'you', 'to', 'watch', 'a', 'special', 'edition', 'of', 'Across', 'China', '.'], ['WW', 'II', 'Landmarks', 'on', 'the', 'Great', 'Earth', 'of', 'China', ':', 'Eternal', 'Memories', 'of', 'Taihang', 'Mountain'], ['Standing', 'tall', 'on', 'Taihang', 'Mountain', 'is', 'the', 'Monument', 'to', 'the', 'Hundred', 'Regiments', 'Offensive', '.'], ['It', 'is', 'composed', 'of', 'a', 'primary', 'stele', ',', 'secondary', 'steles', ',', 'a', 'huge', 'round', 'sculpture', 'and', 'beacon', 'tower', ',', 'and', 'the', 'Great', 'Wall', ',', 'among', 'other', 'things', '.']], 'ner_tags': [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 0], [31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32], [0, 0, 0, 11, 12, 0, 31, 32, 32, 32, 32, 32, 32, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31, 32, 32, 0, 0, 0, 0, 0]], 'input_ids': [[2, 98, 825, 16, 1912, 13, 60, 3], [2, 95, 22719, 102, 10275, 42, 20, 1455, 21, 621, 1322, 16, 464, 998, 13, 9, 3], [2, 13, 14178, 595, 19045, 27, 14, 374, 1073, 16, 998, 13, 45, 10987, 4584, 16, 5466, 7065, 1286, 3], [2, 1288, 2263, 27, 5466, 7065, 1286, 25, 14, 4908, 20, 14, 1874, 12272, 4632, 13, 9, 3], [2, 32, 25, 1869, 16, 21, 1256, 13, 18, 14305, 13, 15, 2277, 6621, 1355, 13, 15, 21, 2329, 560, 5515, 17, 13339, 1710, 13, 15, 17, 14, 374, 769, 13, 15, 497, 89, 564, 13, 9, 3]], 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], 'labels': [[-100, 0, 0, 0, 0, 0, -100, -100], [-100, 0, 0, -100, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 0, -100, -100], [-100, 31, -100, 32, 32, 32, 32, 32, 32, 32, 32, 32, -100, 32, 32, 32, 32, -100, 32, -100], [-100, 0, 0, 0, 11, -100, 12, 0, 31, 32, 32, 32, 32, 32, 32, 0, -100, -100], [-100, 0, 0, 0, 0, 0, 0, 0, -100, -100, 0, -100, 0, 0, -100, 0, -100, 0, 0, 0, 0, 0, 0, 0, 0, -100, 0, 31, 32, 32, 0, -100, 0, 0, 0, 0, -100, -100]]}
28
+ 2022-07-03 15:51:35,822 - transformers.configuration_utils - INFO - loading configuration file models/albert-base-v2_1656839871.089586/checkpoint-14100/config.json
29
+ 2022-07-03 15:51:35,828 - transformers.configuration_utils - INFO - Model config AlbertConfig {
30
+ "_name_or_path": "models/albert-base-v2_1656839871.089586/checkpoint-14100",
31
+ "architectures": [
32
+ "AlbertForTokenClassification"
33
+ ],
34
+ "attention_probs_dropout_prob": 0,
35
+ "bos_token_id": 2,
36
+ "classifier_dropout_prob": 0.1,
37
+ "down_scale_factor": 1,
38
+ "embedding_size": 128,
39
+ "eos_token_id": 3,
40
+ "gap_size": 0,
41
+ "hidden_act": "gelu_new",
42
+ "hidden_dropout_prob": 0,
43
+ "hidden_size": 768,
44
+ "id2label": {
45
+ "0": "O",
46
+ "1": "B-PERSON",
47
+ "2": "I-PERSON",
48
+ "3": "B-NORP",
49
+ "4": "I-NORP",
50
+ "5": "B-FAC",
51
+ "6": "I-FAC",
52
+ "7": "B-ORG",
53
+ "8": "I-ORG",
54
+ "9": "B-GPE",
55
+ "10": "I-GPE",
56
+ "11": "B-LOC",
57
+ "12": "I-LOC",
58
+ "13": "B-PRODUCT",
59
+ "14": "I-PRODUCT",
60
+ "15": "B-DATE",
61
+ "16": "I-DATE",
62
+ "17": "B-TIME",
63
+ "18": "I-TIME",
64
+ "19": "B-PERCENT",
65
+ "20": "I-PERCENT",
66
+ "21": "B-MONEY",
67
+ "22": "I-MONEY",
68
+ "23": "B-QUANTITY",
69
+ "24": "I-QUANTITY",
70
+ "25": "B-ORDINAL",
71
+ "26": "I-ORDINAL",
72
+ "27": "B-CARDINAL",
73
+ "28": "I-CARDINAL",
74
+ "29": "B-EVENT",
75
+ "30": "I-EVENT",
76
+ "31": "B-WORK_OF_ART",
77
+ "32": "I-WORK_OF_ART",
78
+ "33": "B-LAW",
79
+ "34": "I-LAW",
80
+ "35": "B-LANGUAGE",
81
+ "36": "I-LANGUAGE"
82
+ },
83
+ "initializer_range": 0.02,
84
+ "inner_group_num": 1,
85
+ "intermediate_size": 3072,
86
+ "label2id": {
87
+ "B-CARDINAL": 27,
88
+ "B-DATE": 15,
89
+ "B-EVENT": 29,
90
+ "B-FAC": 5,
91
+ "B-GPE": 9,
92
+ "B-LANGUAGE": 35,
93
+ "B-LAW": 33,
94
+ "B-LOC": 11,
95
+ "B-MONEY": 21,
96
+ "B-NORP": 3,
97
+ "B-ORDINAL": 25,
98
+ "B-ORG": 7,
99
+ "B-PERCENT": 19,
100
+ "B-PERSON": 1,
101
+ "B-PRODUCT": 13,
102
+ "B-QUANTITY": 23,
103
+ "B-TIME": 17,
104
+ "B-WORK_OF_ART": 31,
105
+ "I-CARDINAL": 28,
106
+ "I-DATE": 16,
107
+ "I-EVENT": 30,
108
+ "I-FAC": 6,
109
+ "I-GPE": 10,
110
+ "I-LANGUAGE": 36,
111
+ "I-LAW": 34,
112
+ "I-LOC": 12,
113
+ "I-MONEY": 22,
114
+ "I-NORP": 4,
115
+ "I-ORDINAL": 26,
116
+ "I-ORG": 8,
117
+ "I-PERCENT": 20,
118
+ "I-PERSON": 2,
119
+ "I-PRODUCT": 14,
120
+ "I-QUANTITY": 24,
121
+ "I-TIME": 18,
122
+ "I-WORK_OF_ART": 32,
123
+ "O": 0
124
+ },
125
+ "layer_norm_eps": 1e-12,
126
+ "max_position_embeddings": 512,
127
+ "model_type": "albert",
128
+ "net_structure_type": 0,
129
+ "num_attention_heads": 12,
130
+ "num_hidden_groups": 1,
131
+ "num_hidden_layers": 12,
132
+ "num_memory_blocks": 0,
133
+ "pad_token_id": 0,
134
+ "position_embedding_type": "absolute",
135
+ "torch_dtype": "float32",
136
+ "transformers_version": "4.20.0",
137
+ "type_vocab_size": 2,
138
+ "vocab_size": 30000
139
+ }
140
+
141
+ 2022-07-03 15:51:35,912 - transformers.modeling_utils - INFO - loading weights file models/albert-base-v2_1656839871.089586/checkpoint-14100/pytorch_model.bin
142
+ 2022-07-03 15:51:36,021 - transformers.modeling_utils - INFO - All model checkpoint weights were used when initializing AlbertForTokenClassification.
143
+
144
+ 2022-07-03 15:51:36,022 - transformers.modeling_utils - INFO - All the weights of AlbertForTokenClassification were initialized from the model checkpoint at models/albert-base-v2_1656839871.089586/checkpoint-14100.
145
+ If your task is similar to the task the model of the checkpoint was trained on, you can already use AlbertForTokenClassification for predictions without further training.
146
+ 2022-07-03 15:51:36,022 - __main__ - INFO - AlbertForTokenClassification(
147
+ (albert): AlbertModel(
148
+ (embeddings): AlbertEmbeddings(
149
+ (word_embeddings): Embedding(30000, 128, padding_idx=0)
150
+ (position_embeddings): Embedding(512, 128)
151
+ (token_type_embeddings): Embedding(2, 128)
152
+ (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
153
+ (dropout): Dropout(p=0, inplace=False)
154
+ )
155
+ (encoder): AlbertTransformer(
156
+ (embedding_hidden_mapping_in): Linear(in_features=128, out_features=768, bias=True)
157
+ (albert_layer_groups): ModuleList(
158
+ (0): AlbertLayerGroup(
159
+ (albert_layers): ModuleList(
160
+ (0): AlbertLayer(
161
+ (full_layer_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
162
+ (attention): AlbertAttention(
163
+ (query): Linear(in_features=768, out_features=768, bias=True)
164
+ (key): Linear(in_features=768, out_features=768, bias=True)
165
+ (value): Linear(in_features=768, out_features=768, bias=True)
166
+ (attention_dropout): Dropout(p=0, inplace=False)
167
+ (output_dropout): Dropout(p=0, inplace=False)
168
+ (dense): Linear(in_features=768, out_features=768, bias=True)
169
+ (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
170
+ )
171
+ (ffn): Linear(in_features=768, out_features=3072, bias=True)
172
+ (ffn_output): Linear(in_features=3072, out_features=768, bias=True)
173
+ (activation): NewGELUActivation()
174
+ (dropout): Dropout(p=0, inplace=False)
175
+ )
176
+ )
177
+ )
178
+ )
179
+ )
180
+ )
181
+ (dropout): Dropout(p=0.1, inplace=False)
182
+ (classifier): Linear(in_features=768, out_features=37, bias=True)
183
+ )
184
+ 2022-07-03 15:51:36,022 - __main__ - INFO - CONFIGS:{
185
+ "output_dir": "./models/finetuned-base-uncased_1656843680.4141676",
186
+ "per_device_train_batch_size": 16,
187
+ "per_device_eval_batch_size": 16,
188
+ "save_total_limit": 2,
189
+ "num_train_epochs": 3,
190
+ "seed": 1,
191
+ "load_best_model_at_end": true,
192
+ "evaluation_strategy": "epoch",
193
+ "save_strategy": "epoch",
194
+ "learning_rate": 2e-05,
195
+ "weight_decay": 0.01,
196
+ "logging_steps": 469.0
197
+ }
198
+ 2022-07-03 15:51:36,023 - transformers.training_args - INFO - PyTorch: setting up devices
199
+ 2022-07-03 15:51:36,070 - transformers.training_args - INFO - The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
200
+ 2022-07-03 15:51:36,075 - __main__ - INFO - [[ MODEL EVALUATION ]]
201
+ 2022-07-03 15:51:36,075 - transformers.trainer - INFO - The following columns in the evaluation set don't have a corresponding argument in `AlbertForTokenClassification.forward` and have been ignored: id, words, ner_tags. If id, words, ner_tags are not expected by `AlbertForTokenClassification.forward`, you can safely ignore this message.
202
+ 2022-07-03 15:51:36,077 - transformers.trainer - INFO - ***** Running Evaluation *****
203
+ 2022-07-03 15:51:36,077 - transformers.trainer - INFO - Num examples = 9479
204
+ 2022-07-03 15:51:36,078 - transformers.trainer - INFO - Batch size = 16
205
+ 2022-07-03 16:02:02,467 - __main__ - INFO - {'eval_loss': 0.08666322380304337, 'eval_precision': 0.8620168813860506, 'eval_recall': 0.8618637292351425, 'eval_f1': 0.8619402985074628, 'eval_accuracy': 0.9780515276066022, 'eval_runtime': 626.3804, 'eval_samples_per_second': 15.133, 'eval_steps_per_second': 0.947, 'step': 0}
206
+ 2022-07-03 16:02:02,468 - transformers.trainer - INFO - The following columns in the test set don't have a corresponding argument in `AlbertForTokenClassification.forward` and have been ignored: id, words, ner_tags. If id, words, ner_tags are not expected by `AlbertForTokenClassification.forward`, you can safely ignore this message.
207
+ 2022-07-03 16:02:02,471 - transformers.trainer - INFO - ***** Running Prediction *****
208
+ 2022-07-03 16:02:02,471 - transformers.trainer - INFO - Num examples = 9479
209
+ 2022-07-03 16:02:02,471 - transformers.trainer - INFO - Batch size = 16
210
+ 2022-07-03 16:12:35,933 - __main__ - INFO - precision recall f1-score support
211
+
212
+ CARDINAL 0.84 0.83 0.83 935
213
+ DATE 0.84 0.87 0.86 1602
214
+ EVENT 0.61 0.52 0.56 63
215
+ FAC 0.54 0.59 0.56 135
216
+ GPE 0.95 0.94 0.95 2240
217
+ LANGUAGE 0.85 0.50 0.63 22
218
+ LAW 0.56 0.57 0.57 40
219
+ LOC 0.61 0.65 0.63 179
220
+ MONEY 0.85 0.88 0.86 314
221
+ NORP 0.88 0.92 0.90 841
222
+ ORDINAL 0.78 0.86 0.81 195
223
+ ORG 0.84 0.81 0.82 1795
224
+ PERCENT 0.88 0.87 0.88 349
225
+ PERSON 0.94 0.92 0.93 1988
226
+ PRODUCT 0.57 0.53 0.55 76
227
+ QUANTITY 0.77 0.81 0.79 105
228
+ TIME 0.59 0.66 0.62 212
229
+ WORK_OF_ART 0.60 0.52 0.56 166
230
+
231
+ micro avg 0.86 0.86 0.86 11257
232
+ macro avg 0.75 0.74 0.74 11257
233
+ weighted avg 0.86 0.86 0.86 11257
234
+
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4832c46d69c0f42a62c6af8ccfb9b6bb46f2324d23dd612cf69faa839b423eb8
3
+ size 44493212
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8de2b6a7780b1ca9b38a0a34f6bd5e8003378e955a637b4ce4e3c97910f9f7f8
3
+ size 44498135
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ceb5f98e7b998ad8337ebed923c3ea142c9ee62b5bd132058e0fd660c83dd81
3
+ size 14503
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0271a42487a5a379bf223d761cfc585dcba1a6069b012d2a634c31973187a24e
3
+ size 623
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": {
6
+ "content": "[MASK]",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "[SEP]",
14
+ "unk_token": "<unk>"
15
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "do_lower_case": true,
5
+ "eos_token": "[SEP]",
6
+ "keep_accents": false,
7
+ "mask_token": {
8
+ "__type": "AddedToken",
9
+ "content": "[MASK]",
10
+ "lstrip": true,
11
+ "normalized": false,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ },
15
+ "model_max_length": 512,
16
+ "name_or_path": "albert-base-v2",
17
+ "pad_token": "<pad>",
18
+ "remove_space": true,
19
+ "sep_token": "[SEP]",
20
+ "special_tokens_map_file": null,
21
+ "tokenizer_class": "AlbertTokenizer",
22
+ "unk_token": "<unk>"
23
+ }
trainer_state.json ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.08603910356760025,
3
+ "best_model_checkpoint": "./models/albert-base-v2_1656839871.089586/checkpoint-9400",
4
+ "epoch": 3.0,
5
+ "global_step": 14100,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.1,
12
+ "learning_rate": 1.9334751773049647e-05,
13
+ "loss": 0.2491,
14
+ "step": 469
15
+ },
16
+ {
17
+ "epoch": 0.2,
18
+ "learning_rate": 1.866950354609929e-05,
19
+ "loss": 0.1368,
20
+ "step": 938
21
+ },
22
+ {
23
+ "epoch": 0.3,
24
+ "learning_rate": 1.8004255319148936e-05,
25
+ "loss": 0.1139,
26
+ "step": 1407
27
+ },
28
+ {
29
+ "epoch": 0.4,
30
+ "learning_rate": 1.7339007092198585e-05,
31
+ "loss": 0.1082,
32
+ "step": 1876
33
+ },
34
+ {
35
+ "epoch": 0.5,
36
+ "learning_rate": 1.667375886524823e-05,
37
+ "loss": 0.0974,
38
+ "step": 2345
39
+ },
40
+ {
41
+ "epoch": 0.6,
42
+ "learning_rate": 1.6008510638297874e-05,
43
+ "loss": 0.0892,
44
+ "step": 2814
45
+ },
46
+ {
47
+ "epoch": 0.7,
48
+ "learning_rate": 1.534326241134752e-05,
49
+ "loss": 0.0941,
50
+ "step": 3283
51
+ },
52
+ {
53
+ "epoch": 0.8,
54
+ "learning_rate": 1.4678014184397164e-05,
55
+ "loss": 0.0826,
56
+ "step": 3752
57
+ },
58
+ {
59
+ "epoch": 0.9,
60
+ "learning_rate": 1.401276595744681e-05,
61
+ "loss": 0.0806,
62
+ "step": 4221
63
+ },
64
+ {
65
+ "epoch": 1.0,
66
+ "learning_rate": 1.3347517730496456e-05,
67
+ "loss": 0.082,
68
+ "step": 4690
69
+ },
70
+ {
71
+ "epoch": 1.0,
72
+ "eval_accuracy": 0.9746784684424369,
73
+ "eval_f1": 0.8338050592605697,
74
+ "eval_loss": 0.0911484956741333,
75
+ "eval_precision": 0.8302069572875386,
76
+ "eval_recall": 0.8374344852092032,
77
+ "eval_runtime": 37.0605,
78
+ "eval_samples_per_second": 255.771,
79
+ "eval_steps_per_second": 16.001,
80
+ "step": 4700
81
+ },
82
+ {
83
+ "epoch": 1.1,
84
+ "learning_rate": 1.26822695035461e-05,
85
+ "loss": 0.0686,
86
+ "step": 5159
87
+ },
88
+ {
89
+ "epoch": 1.2,
90
+ "learning_rate": 1.2017021276595745e-05,
91
+ "loss": 0.0645,
92
+ "step": 5628
93
+ },
94
+ {
95
+ "epoch": 1.3,
96
+ "learning_rate": 1.1351773049645392e-05,
97
+ "loss": 0.0638,
98
+ "step": 6097
99
+ },
100
+ {
101
+ "epoch": 1.4,
102
+ "learning_rate": 1.0686524822695037e-05,
103
+ "loss": 0.0606,
104
+ "step": 6566
105
+ },
106
+ {
107
+ "epoch": 1.5,
108
+ "learning_rate": 1.0021276595744682e-05,
109
+ "loss": 0.0616,
110
+ "step": 7035
111
+ },
112
+ {
113
+ "epoch": 1.6,
114
+ "learning_rate": 9.356028368794327e-06,
115
+ "loss": 0.0579,
116
+ "step": 7504
117
+ },
118
+ {
119
+ "epoch": 1.7,
120
+ "learning_rate": 8.690780141843971e-06,
121
+ "loss": 0.0561,
122
+ "step": 7973
123
+ },
124
+ {
125
+ "epoch": 1.8,
126
+ "learning_rate": 8.025531914893618e-06,
127
+ "loss": 0.0565,
128
+ "step": 8442
129
+ },
130
+ {
131
+ "epoch": 1.9,
132
+ "learning_rate": 7.360283687943263e-06,
133
+ "loss": 0.0551,
134
+ "step": 8911
135
+ },
136
+ {
137
+ "epoch": 2.0,
138
+ "learning_rate": 6.695035460992908e-06,
139
+ "loss": 0.0592,
140
+ "step": 9380
141
+ },
142
+ {
143
+ "epoch": 2.0,
144
+ "eval_accuracy": 0.9760701501954842,
145
+ "eval_f1": 0.8429752066115702,
146
+ "eval_loss": 0.08603910356760025,
147
+ "eval_precision": 0.8525483783047152,
148
+ "eval_recall": 0.8336146397796926,
149
+ "eval_runtime": 36.5619,
150
+ "eval_samples_per_second": 259.259,
151
+ "eval_steps_per_second": 16.219,
152
+ "step": 9400
153
+ },
154
+ {
155
+ "epoch": 2.1,
156
+ "learning_rate": 6.029787234042554e-06,
157
+ "loss": 0.0406,
158
+ "step": 9849
159
+ },
160
+ {
161
+ "epoch": 2.2,
162
+ "learning_rate": 5.3645390070921985e-06,
163
+ "loss": 0.0387,
164
+ "step": 10318
165
+ },
166
+ {
167
+ "epoch": 2.3,
168
+ "learning_rate": 4.699290780141844e-06,
169
+ "loss": 0.0399,
170
+ "step": 10787
171
+ },
172
+ {
173
+ "epoch": 2.39,
174
+ "learning_rate": 4.03404255319149e-06,
175
+ "loss": 0.0407,
176
+ "step": 11256
177
+ },
178
+ {
179
+ "epoch": 2.49,
180
+ "learning_rate": 3.368794326241135e-06,
181
+ "loss": 0.0394,
182
+ "step": 11725
183
+ },
184
+ {
185
+ "epoch": 2.59,
186
+ "learning_rate": 2.70354609929078e-06,
187
+ "loss": 0.0369,
188
+ "step": 12194
189
+ },
190
+ {
191
+ "epoch": 2.69,
192
+ "learning_rate": 2.0382978723404254e-06,
193
+ "loss": 0.0408,
194
+ "step": 12663
195
+ },
196
+ {
197
+ "epoch": 2.79,
198
+ "learning_rate": 1.373049645390071e-06,
199
+ "loss": 0.0363,
200
+ "step": 13132
201
+ },
202
+ {
203
+ "epoch": 2.89,
204
+ "learning_rate": 7.078014184397164e-07,
205
+ "loss": 0.0392,
206
+ "step": 13601
207
+ },
208
+ {
209
+ "epoch": 2.99,
210
+ "learning_rate": 4.2553191489361707e-08,
211
+ "loss": 0.0392,
212
+ "step": 14070
213
+ },
214
+ {
215
+ "epoch": 3.0,
216
+ "eval_accuracy": 0.9780515276066022,
217
+ "eval_f1": 0.8619402985074628,
218
+ "eval_loss": 0.08666320890188217,
219
+ "eval_precision": 0.8620168813860506,
220
+ "eval_recall": 0.8618637292351425,
221
+ "eval_runtime": 36.7418,
222
+ "eval_samples_per_second": 257.989,
223
+ "eval_steps_per_second": 16.14,
224
+ "step": 14100
225
+ }
226
+ ],
227
+ "max_steps": 14100,
228
+ "num_train_epochs": 3,
229
+ "total_flos": 580572089788770.0,
230
+ "trial_name": null,
231
+ "trial_params": null
232
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15c7912e3146ffbf0b4944c488054f98500ed7228181655063dab2a1d36f7001
3
+ size 3311