Updated model files

Browse files

Files changed (13) hide show

.gitattributes +2 -9
README.md +48 -0
config.json +111 -0
eval.log +234 -0
model.safetensors +3 -0
pytorch_model.bin +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
special_tokens_map.json +15 -0
tokenizer.json +0 -0
tokenizer_config.json +23 -0
trainer_state.json +232 -0
training_args.bin +3 -0

.gitattributes CHANGED Viewed

@@ -2,34 +2,27 @@
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
 *.model filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.parquet filter=lfs diff=lfs merge=lfs -text
 *.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
 *.model filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.parquet filter=lfs diff=lfs merge=lfs -text
 *.pb filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
+*.zstandard filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+model.safetensors filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,48 @@

+---
+tags:
+- token-classification
+datasets:
+- djagatiya/ner-ontonotes-v5-eng-v4
+widget:
+- text: "On September 1st George won 1 dollar while watching Game of Thrones."
+---
+# (NER) ALBERT-base-v2 : conll2012_ontonotesv5-english-v4
+This `ALBERT-base-v2` NER model was finetuned on `conll2012_ontonotesv5` version `english-v4` dataset. <br>
+Check out [NER-System Repository](https://github.com/djagatiya/NER-System) for more information.
+## Evaluation
+- Precision: 86.20
+- Recall: 86.18
+- F1-Score: 86.19
+> check out this [eval.log](eval.log) file for evaluation metrics and classification report.
+```
+               precision    recall  f1-score   support
+    CARDINAL       0.84      0.83      0.83       935
+        DATE       0.84      0.87      0.86      1602
+       EVENT       0.61      0.52      0.56        63
+         FAC       0.54      0.59      0.56       135
+         GPE       0.95      0.94      0.95      2240
+    LANGUAGE       0.85      0.50      0.63        22
+         LAW       0.56      0.57      0.57        40
+         LOC       0.61      0.65      0.63       179
+       MONEY       0.85      0.88      0.86       314
+        NORP       0.88      0.92      0.90       841
+     ORDINAL       0.78      0.86      0.81       195
+         ORG       0.84      0.81      0.82      1795
+     PERCENT       0.88      0.87      0.88       349
+      PERSON       0.94      0.92      0.93      1988
+     PRODUCT       0.57      0.53      0.55        76
+    QUANTITY       0.77      0.81      0.79       105
+        TIME       0.59      0.66      0.62       212
+ WORK_OF_ART       0.60      0.52      0.56       166
+   micro avg       0.86      0.86      0.86     11257
+   macro avg       0.75      0.74      0.74     11257
+weighted avg       0.86      0.86      0.86     11257
+```

config.json ADDED Viewed

	@@ -0,0 +1,111 @@

+{
+  "_name_or_path": "albert-base-v2",
+  "architectures": [
+    "AlbertForTokenClassification"
+  ],
+  "attention_probs_dropout_prob": 0,
+  "bos_token_id": 2,
+  "classifier_dropout_prob": 0.1,
+  "down_scale_factor": 1,
+  "embedding_size": 128,
+  "eos_token_id": 3,
+  "gap_size": 0,
+  "hidden_act": "gelu_new",
+  "hidden_dropout_prob": 0,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "O",
+    "1": "B-PERSON",
+    "2": "I-PERSON",
+    "3": "B-NORP",
+    "4": "I-NORP",
+    "5": "B-FAC",
+    "6": "I-FAC",
+    "7": "B-ORG",
+    "8": "I-ORG",
+    "9": "B-GPE",
+    "10": "I-GPE",
+    "11": "B-LOC",
+    "12": "I-LOC",
+    "13": "B-PRODUCT",
+    "14": "I-PRODUCT",
+    "15": "B-DATE",
+    "16": "I-DATE",
+    "17": "B-TIME",
+    "18": "I-TIME",
+    "19": "B-PERCENT",
+    "20": "I-PERCENT",
+    "21": "B-MONEY",
+    "22": "I-MONEY",
+    "23": "B-QUANTITY",
+    "24": "I-QUANTITY",
+    "25": "B-ORDINAL",
+    "26": "I-ORDINAL",
+    "27": "B-CARDINAL",
+    "28": "I-CARDINAL",
+    "29": "B-EVENT",
+    "30": "I-EVENT",
+    "31": "B-WORK_OF_ART",
+    "32": "I-WORK_OF_ART",
+    "33": "B-LAW",
+    "34": "I-LAW",
+    "35": "B-LANGUAGE",
+    "36": "I-LANGUAGE"
+  },
+  "initializer_range": 0.02,
+  "inner_group_num": 1,
+  "intermediate_size": 3072,
+  "label2id": {
+    "B-CARDINAL": 27,
+    "B-DATE": 15,
+    "B-EVENT": 29,
+    "B-FAC": 5,
+    "B-GPE": 9,
+    "B-LANGUAGE": 35,
+    "B-LAW": 33,
+    "B-LOC": 11,
+    "B-MONEY": 21,
+    "B-NORP": 3,
+    "B-ORDINAL": 25,
+    "B-ORG": 7,
+    "B-PERCENT": 19,
+    "B-PERSON": 1,
+    "B-PRODUCT": 13,
+    "B-QUANTITY": 23,
+    "B-TIME": 17,
+    "B-WORK_OF_ART": 31,
+    "I-CARDINAL": 28,
+    "I-DATE": 16,
+    "I-EVENT": 30,
+    "I-FAC": 6,
+    "I-GPE": 10,
+    "I-LANGUAGE": 36,
+    "I-LAW": 34,
+    "I-LOC": 12,
+    "I-MONEY": 22,
+    "I-NORP": 4,
+    "I-ORDINAL": 26,
+    "I-ORG": 8,
+    "I-PERCENT": 20,
+    "I-PERSON": 2,
+    "I-PRODUCT": 14,
+    "I-QUANTITY": 24,
+    "I-TIME": 18,
+    "I-WORK_OF_ART": 32,
+    "O": 0
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "albert",
+  "net_structure_type": 0,
+  "num_attention_heads": 12,
+  "num_hidden_groups": 1,
+  "num_hidden_layers": 12,
+  "num_memory_blocks": 0,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.20.0",
+  "type_vocab_size": 2,
+  "vocab_size": 30000
+}

eval.log ADDED Viewed

	@@ -0,0 +1,234 @@

+2022-07-03 15:51:20,416 - __main__ - INFO - Label List:['O', 'B-PERSON', 'I-PERSON', 'B-NORP', 'I-NORP', 'B-FAC', 'I-FAC', 'B-ORG', 'I-ORG', 'B-GPE', 'I-GPE', 'B-LOC', 'I-LOC', 'B-PRODUCT', 'I-PRODUCT', 'B-DATE', 'I-DATE', 'B-TIME', 'I-TIME', 'B-PERCENT', 'I-PERCENT', 'B-MONEY', 'I-MONEY', 'B-QUANTITY', 'I-QUANTITY', 'B-ORDINAL', 'I-ORDINAL', 'B-CARDINAL', 'I-CARDINAL', 'B-EVENT', 'I-EVENT', 'B-WORK_OF_ART', 'I-WORK_OF_ART', 'B-LAW', 'I-LAW', 'B-LANGUAGE', 'I-LANGUAGE']
+2022-07-03 15:51:26,630 - __main__ - INFO - Dataset({
+    features: ['id', 'words', 'ner_tags'],
+    num_rows: 75187
+})
+2022-07-03 15:51:27,367 - __main__ - INFO - Dataset({
+    features: ['id', 'words', 'ner_tags'],
+    num_rows: 9479
+})
+2022-07-03 15:51:27,370 - transformers.tokenization_utils_base - INFO - Didn't find file models/albert-base-v2_1656839871.089586/checkpoint-14100/spiece.model. We won't load it.
+2022-07-03 15:51:27,370 - transformers.tokenization_utils_base - INFO - Didn't find file models/albert-base-v2_1656839871.089586/checkpoint-14100/added_tokens.json. We won't load it.
+2022-07-03 15:51:27,371 - transformers.tokenization_utils_base - INFO - loading file None
+2022-07-03 15:51:27,371 - transformers.tokenization_utils_base - INFO - loading file models/albert-base-v2_1656839871.089586/checkpoint-14100/tokenizer.json
+2022-07-03 15:51:27,371 - transformers.tokenization_utils_base - INFO - loading file None
+2022-07-03 15:51:27,371 - transformers.tokenization_utils_base - INFO - loading file models/albert-base-v2_1656839871.089586/checkpoint-14100/special_tokens_map.json
+2022-07-03 15:51:27,372 - transformers.tokenization_utils_base - INFO - loading file models/albert-base-v2_1656839871.089586/checkpoint-14100/tokenizer_config.json
+2022-07-03 15:51:27,422 - __main__ - INFO - {'input_ids': [[2, 98, 825, 16, 1912, 13, 60, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [2, 95, 22719, 102, 10275, 42, 20, 1455, 21, 621, 1322, 16, 464, 998, 13, 9, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [2, 13, 14178, 595, 19045, 27, 14, 374, 1073, 16, 998, 13, 45, 10987, 4584, 16, 5466, 7065, 1286, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [2, 1288, 2263, 27, 5466, 7065, 1286, 25, 14, 4908, 20, 14, 1874, 12272, 4632, 13, 9, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [2, 32, 25, 1869, 16, 21, 1256, 13, 18, 14305, 13, 15, 2277, 6621, 1355, 13, 15, 21, 2329, 560, 5515, 17, 13339, 1710, 13, 15, 17, 14, 374, 769, 13, 15, 497, 89, 564, 13, 9, 3]], 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]}
+2022-07-03 15:51:27,422 - __main__ - INFO - ['[CLS]', '▁what', '▁kind', '▁of', '▁memory', '▁', '?', '[SEP]', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
+2022-07-03 15:51:27,422 - __main__ - INFO - ['[CLS]', '▁we', '▁respectful', 'ly', '▁invite', '▁you', '▁to', '▁watch', '▁a', '▁special', '▁edition', '▁of', '▁across', '▁china', '▁', '.', '[SEP]', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
+2022-07-03 15:51:27,423 - __main__ - INFO - ['[CLS]', '▁', 'ww', '▁ii', '▁landmarks', '▁on', '▁the', '▁great', '▁earth', '▁of', '▁china', '▁', ':', '▁eternal', '▁memories', '▁of', '▁tai', 'hang', '▁mountain', '[SEP]', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
+2022-07-03 15:51:27,423 - __main__ - INFO - ['[CLS]', '▁standing', '▁tall', '▁on', '▁tai', 'hang', '▁mountain', '▁is', '▁the', '▁monument', '▁to', '▁the', '▁hundred', '▁regiments', '▁offensive', '▁', '.', '[SEP]', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
+2022-07-03 15:51:27,423 - __main__ - INFO - ['[CLS]', '▁it', '▁is', '▁composed', '▁of', '▁a', '▁primary', '▁', 's', 'tele', '▁', ',', '▁secondary', '▁ste', 'les', '▁', ',', '▁a', '▁huge', '▁round', '▁sculpture', '▁and', '▁beacon', '▁tower', '▁', ',', '▁and', '▁the', '▁great', '▁wall', '▁', ',', '▁among', '▁other', '▁things', '▁', '.', '[SEP]']
+2022-07-03 15:51:27,423 - __main__ - INFO - -------------
+2022-07-03 15:51:27,423 - __main__ - INFO - ['[CLS]', '▁we', '▁respectful', 'ly', '▁invite', '▁you', '▁to', '▁watch', '▁a', '▁special', '▁edition', '▁of', '▁across', '▁china', '▁', '.', '[SEP]', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
+2022-07-03 15:51:27,423 - __main__ - INFO - [None, 0, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]
+2022-07-03 15:51:27,427 - datasets.fingerprint - WARNING - Parameter 'function'=<function tokenize_and_align_labels at 0x7f8c9a20af70> of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only showed once. Subsequent hashing failures won't be showed.
+2022-07-03 15:51:32,943 - __main__ - INFO - {'id': [0, 1, 2, 3, 4], 'words': [['What', 'kind', 'of', 'memory', '?'], ['We', 'respectfully', 'invite', 'you', 'to', 'watch', 'a', 'special', 'edition', 'of', 'Across', 'China', '.'], ['WW', 'II', 'Landmarks', 'on', 'the', 'Great', 'Earth', 'of', 'China', ':', 'Eternal', 'Memories', 'of', 'Taihang', 'Mountain'], ['Standing', 'tall', 'on', 'Taihang', 'Mountain', 'is', 'the', 'Monument', 'to', 'the', 'Hundred', 'Regiments', 'Offensive', '.'], ['It', 'is', 'composed', 'of', 'a', 'primary', 'stele', ',', 'secondary', 'steles', ',', 'a', 'huge', 'round', 'sculpture', 'and', 'beacon', 'tower', ',', 'and', 'the', 'Great', 'Wall', ',', 'among', 'other', 'things', '.']], 'ner_tags': [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 0], [31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32], [0, 0, 0, 11, 12, 0, 31, 32, 32, 32, 32, 32, 32, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31, 32, 32, 0, 0, 0, 0, 0]], 'input_ids': [[2, 98, 825, 16, 1912, 13, 60, 3], [2, 95, 22719, 102, 10275, 42, 20, 1455, 21, 621, 1322, 16, 464, 998, 13, 9, 3], [2, 13, 14178, 595, 19045, 27, 14, 374, 1073, 16, 998, 13, 45, 10987, 4584, 16, 5466, 7065, 1286, 3], [2, 1288, 2263, 27, 5466, 7065, 1286, 25, 14, 4908, 20, 14, 1874, 12272, 4632, 13, 9, 3], [2, 32, 25, 1869, 16, 21, 1256, 13, 18, 14305, 13, 15, 2277, 6621, 1355, 13, 15, 21, 2329, 560, 5515, 17, 13339, 1710, 13, 15, 17, 14, 374, 769, 13, 15, 497, 89, 564, 13, 9, 3]], 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], 'labels': [[-100, 0, 0, 0, 0, 0, -100, -100], [-100, 0, 0, -100, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 0, -100, -100], [-100, 31, -100, 32, 32, 32, 32, 32, 32, 32, 32, 32, -100, 32, 32, 32, 32, -100, 32, -100], [-100, 0, 0, 0, 11, -100, 12, 0, 31, 32, 32, 32, 32, 32, 32, 0, -100, -100], [-100, 0, 0, 0, 0, 0, 0, 0, -100, -100, 0, -100, 0, 0, -100, 0, -100, 0, 0, 0, 0, 0, 0, 0, 0, -100, 0, 31, 32, 32, 0, -100, 0, 0, 0, 0, -100, -100]]}
+2022-07-03 15:51:35,822 - transformers.configuration_utils - INFO - loading configuration file models/albert-base-v2_1656839871.089586/checkpoint-14100/config.json
+2022-07-03 15:51:35,828 - transformers.configuration_utils - INFO - Model config AlbertConfig {
+  "_name_or_path": "models/albert-base-v2_1656839871.089586/checkpoint-14100",
+  "architectures": [
+    "AlbertForTokenClassification"
+  ],
+  "attention_probs_dropout_prob": 0,
+  "bos_token_id": 2,
+  "classifier_dropout_prob": 0.1,
+  "down_scale_factor": 1,
+  "embedding_size": 128,
+  "eos_token_id": 3,
+  "gap_size": 0,
+  "hidden_act": "gelu_new",
+  "hidden_dropout_prob": 0,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "O",
+    "1": "B-PERSON",
+    "2": "I-PERSON",
+    "3": "B-NORP",
+    "4": "I-NORP",
+    "5": "B-FAC",
+    "6": "I-FAC",
+    "7": "B-ORG",
+    "8": "I-ORG",
+    "9": "B-GPE",
+    "10": "I-GPE",
+    "11": "B-LOC",
+    "12": "I-LOC",
+    "13": "B-PRODUCT",
+    "14": "I-PRODUCT",
+    "15": "B-DATE",
+    "16": "I-DATE",
+    "17": "B-TIME",
+    "18": "I-TIME",
+    "19": "B-PERCENT",
+    "20": "I-PERCENT",
+    "21": "B-MONEY",
+    "22": "I-MONEY",
+    "23": "B-QUANTITY",
+    "24": "I-QUANTITY",
+    "25": "B-ORDINAL",
+    "26": "I-ORDINAL",
+    "27": "B-CARDINAL",
+    "28": "I-CARDINAL",
+    "29": "B-EVENT",
+    "30": "I-EVENT",
+    "31": "B-WORK_OF_ART",
+    "32": "I-WORK_OF_ART",
+    "33": "B-LAW",
+    "34": "I-LAW",
+    "35": "B-LANGUAGE",
+    "36": "I-LANGUAGE"
+  },
+  "initializer_range": 0.02,
+  "inner_group_num": 1,
+  "intermediate_size": 3072,
+  "label2id": {
+    "B-CARDINAL": 27,
+    "B-DATE": 15,
+    "B-EVENT": 29,
+    "B-FAC": 5,
+    "B-GPE": 9,
+    "B-LANGUAGE": 35,
+    "B-LAW": 33,
+    "B-LOC": 11,
+    "B-MONEY": 21,
+    "B-NORP": 3,
+    "B-ORDINAL": 25,
+    "B-ORG": 7,
+    "B-PERCENT": 19,
+    "B-PERSON": 1,
+    "B-PRODUCT": 13,
+    "B-QUANTITY": 23,
+    "B-TIME": 17,
+    "B-WORK_OF_ART": 31,
+    "I-CARDINAL": 28,
+    "I-DATE": 16,
+    "I-EVENT": 30,
+    "I-FAC": 6,
+    "I-GPE": 10,
+    "I-LANGUAGE": 36,
+    "I-LAW": 34,
+    "I-LOC": 12,
+    "I-MONEY": 22,
+    "I-NORP": 4,
+    "I-ORDINAL": 26,
+    "I-ORG": 8,
+    "I-PERCENT": 20,
+    "I-PERSON": 2,
+    "I-PRODUCT": 14,
+    "I-QUANTITY": 24,
+    "I-TIME": 18,
+    "I-WORK_OF_ART": 32,
+    "O": 0
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "albert",
+  "net_structure_type": 0,
+  "num_attention_heads": 12,
+  "num_hidden_groups": 1,
+  "num_hidden_layers": 12,
+  "num_memory_blocks": 0,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.20.0",
+  "type_vocab_size": 2,
+  "vocab_size": 30000
+}
+2022-07-03 15:51:35,912 - transformers.modeling_utils - INFO - loading weights file models/albert-base-v2_1656839871.089586/checkpoint-14100/pytorch_model.bin
+2022-07-03 15:51:36,021 - transformers.modeling_utils - INFO - All model checkpoint weights were used when initializing AlbertForTokenClassification.
+2022-07-03 15:51:36,022 - transformers.modeling_utils - INFO - All the weights of AlbertForTokenClassification were initialized from the model checkpoint at models/albert-base-v2_1656839871.089586/checkpoint-14100.
+If your task is similar to the task the model of the checkpoint was trained on, you can already use AlbertForTokenClassification for predictions without further training.
+2022-07-03 15:51:36,022 - __main__ - INFO - AlbertForTokenClassification(
+  (albert): AlbertModel(
+    (embeddings): AlbertEmbeddings(
+      (word_embeddings): Embedding(30000, 128, padding_idx=0)
+      (position_embeddings): Embedding(512, 128)
+      (token_type_embeddings): Embedding(2, 128)
+      (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
+      (dropout): Dropout(p=0, inplace=False)
+    )
+    (encoder): AlbertTransformer(
+      (embedding_hidden_mapping_in): Linear(in_features=128, out_features=768, bias=True)
+      (albert_layer_groups): ModuleList(
+        (0): AlbertLayerGroup(
+          (albert_layers): ModuleList(
+            (0): AlbertLayer(
+              (full_layer_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
+              (attention): AlbertAttention(
+                (query): Linear(in_features=768, out_features=768, bias=True)
+                (key): Linear(in_features=768, out_features=768, bias=True)
+                (value): Linear(in_features=768, out_features=768, bias=True)
+                (attention_dropout): Dropout(p=0, inplace=False)
+                (output_dropout): Dropout(p=0, inplace=False)
+                (dense): Linear(in_features=768, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
+              )
+              (ffn): Linear(in_features=768, out_features=3072, bias=True)
+              (ffn_output): Linear(in_features=3072, out_features=768, bias=True)
+              (activation): NewGELUActivation()
+              (dropout): Dropout(p=0, inplace=False)
+            )
+          )
+        )
+      )
+    )
+  )
+  (dropout): Dropout(p=0.1, inplace=False)
+  (classifier): Linear(in_features=768, out_features=37, bias=True)
+)
+2022-07-03 15:51:36,022 - __main__ - INFO - CONFIGS:{
+    "output_dir": "./models/finetuned-base-uncased_1656843680.4141676",
+    "per_device_train_batch_size": 16,
+    "per_device_eval_batch_size": 16,
+    "save_total_limit": 2,
+    "num_train_epochs": 3,
+    "seed": 1,
+    "load_best_model_at_end": true,
+    "evaluation_strategy": "epoch",
+    "save_strategy": "epoch",
+    "learning_rate": 2e-05,
+    "weight_decay": 0.01,
+    "logging_steps": 469.0
+}
+2022-07-03 15:51:36,023 - transformers.training_args - INFO - PyTorch: setting up devices
+2022-07-03 15:51:36,070 - transformers.training_args - INFO - The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
+2022-07-03 15:51:36,075 - __main__ - INFO - [[ MODEL EVALUATION ]]
+2022-07-03 15:51:36,075 - transformers.trainer - INFO - The following columns in the evaluation set don't have a corresponding argument in `AlbertForTokenClassification.forward` and have been ignored: id, words, ner_tags. If id, words, ner_tags are not expected by `AlbertForTokenClassification.forward`,  you can safely ignore this message.
+2022-07-03 15:51:36,077 - transformers.trainer - INFO - ***** Running Evaluation *****
+2022-07-03 15:51:36,077 - transformers.trainer - INFO -   Num examples = 9479
+2022-07-03 15:51:36,078 - transformers.trainer - INFO -   Batch size = 16
+2022-07-03 16:02:02,467 - __main__ - INFO - {'eval_loss': 0.08666322380304337, 'eval_precision': 0.8620168813860506, 'eval_recall': 0.8618637292351425, 'eval_f1': 0.8619402985074628, 'eval_accuracy': 0.9780515276066022, 'eval_runtime': 626.3804, 'eval_samples_per_second': 15.133, 'eval_steps_per_second': 0.947, 'step': 0}
+2022-07-03 16:02:02,468 - transformers.trainer - INFO - The following columns in the test set don't have a corresponding argument in `AlbertForTokenClassification.forward` and have been ignored: id, words, ner_tags. If id, words, ner_tags are not expected by `AlbertForTokenClassification.forward`,  you can safely ignore this message.
+2022-07-03 16:02:02,471 - transformers.trainer - INFO - ***** Running Prediction *****
+2022-07-03 16:02:02,471 - transformers.trainer - INFO -   Num examples = 9479
+2022-07-03 16:02:02,471 - transformers.trainer - INFO -   Batch size = 16
+2022-07-03 16:12:35,933 - __main__ - INFO -               precision    recall  f1-score   support
+    CARDINAL       0.84      0.83      0.83       935
+        DATE       0.84      0.87      0.86      1602
+       EVENT       0.61      0.52      0.56        63
+         FAC       0.54      0.59      0.56       135
+         GPE       0.95      0.94      0.95      2240
+    LANGUAGE       0.85      0.50      0.63        22
+         LAW       0.56      0.57      0.57        40
+         LOC       0.61      0.65      0.63       179
+       MONEY       0.85      0.88      0.86       314
+        NORP       0.88      0.92      0.90       841
+     ORDINAL       0.78      0.86      0.81       195
+         ORG       0.84      0.81      0.82      1795
+     PERCENT       0.88      0.87      0.88       349
+      PERSON       0.94      0.92      0.93      1988
+     PRODUCT       0.57      0.53      0.55        76
+    QUANTITY       0.77      0.81      0.79       105
+        TIME       0.59      0.66      0.62       212
+ WORK_OF_ART       0.60      0.52      0.56       166
+   micro avg       0.86      0.86      0.86     11257
+   macro avg       0.75      0.74      0.74     11257
+weighted avg       0.86      0.86      0.86     11257

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4832c46d69c0f42a62c6af8ccfb9b6bb46f2324d23dd612cf69faa839b423eb8
+size 44493212

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8de2b6a7780b1ca9b38a0a34f6bd5e8003378e955a637b4ce4e3c97910f9f7f8
+size 44498135

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7ceb5f98e7b998ad8337ebed923c3ea142c9ee62b5bd132058e0fd660c83dd81
+size 14503

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0271a42487a5a379bf223d761cfc585dcba1a6069b012d2a634c31973187a24e
+size 623

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "[CLS]",
+  "cls_token": "[CLS]",
+  "eos_token": "[SEP]",
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "sep_token": "[SEP]",
+  "unk_token": "<unk>"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "bos_token": "[CLS]",
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "eos_token": "[SEP]",
+  "keep_accents": false,
+  "mask_token": {
+    "__type": "AddedToken",
+    "content": "[MASK]",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "model_max_length": 512,
+  "name_or_path": "albert-base-v2",
+  "pad_token": "<pad>",
+  "remove_space": true,
+  "sep_token": "[SEP]",
+  "special_tokens_map_file": null,
+  "tokenizer_class": "AlbertTokenizer",
+  "unk_token": "<unk>"
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,232 @@

+{
+  "best_metric": 0.08603910356760025,
+  "best_model_checkpoint": "./models/albert-base-v2_1656839871.089586/checkpoint-9400",
+  "epoch": 3.0,
+  "global_step": 14100,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.1,
+      "learning_rate": 1.9334751773049647e-05,
+      "loss": 0.2491,
+      "step": 469
+    },
+    {
+      "epoch": 0.2,
+      "learning_rate": 1.866950354609929e-05,
+      "loss": 0.1368,
+      "step": 938
+    },
+    {
+      "epoch": 0.3,
+      "learning_rate": 1.8004255319148936e-05,
+      "loss": 0.1139,
+      "step": 1407
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 1.7339007092198585e-05,
+      "loss": 0.1082,
+      "step": 1876
+    },
+    {
+      "epoch": 0.5,
+      "learning_rate": 1.667375886524823e-05,
+      "loss": 0.0974,
+      "step": 2345
+    },
+    {
+      "epoch": 0.6,
+      "learning_rate": 1.6008510638297874e-05,
+      "loss": 0.0892,
+      "step": 2814
+    },
+    {
+      "epoch": 0.7,
+      "learning_rate": 1.534326241134752e-05,
+      "loss": 0.0941,
+      "step": 3283
+    },
+    {
+      "epoch": 0.8,
+      "learning_rate": 1.4678014184397164e-05,
+      "loss": 0.0826,
+      "step": 3752
+    },
+    {
+      "epoch": 0.9,
+      "learning_rate": 1.401276595744681e-05,
+      "loss": 0.0806,
+      "step": 4221
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.3347517730496456e-05,
+      "loss": 0.082,
+      "step": 4690
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.9746784684424369,
+      "eval_f1": 0.8338050592605697,
+      "eval_loss": 0.0911484956741333,
+      "eval_precision": 0.8302069572875386,
+      "eval_recall": 0.8374344852092032,
+      "eval_runtime": 37.0605,
+      "eval_samples_per_second": 255.771,
+      "eval_steps_per_second": 16.001,
+      "step": 4700
+    },
+    {
+      "epoch": 1.1,
+      "learning_rate": 1.26822695035461e-05,
+      "loss": 0.0686,
+      "step": 5159
+    },
+    {
+      "epoch": 1.2,
+      "learning_rate": 1.2017021276595745e-05,
+      "loss": 0.0645,
+      "step": 5628
+    },
+    {
+      "epoch": 1.3,
+      "learning_rate": 1.1351773049645392e-05,
+      "loss": 0.0638,
+      "step": 6097
+    },
+    {
+      "epoch": 1.4,
+      "learning_rate": 1.0686524822695037e-05,
+      "loss": 0.0606,
+      "step": 6566
+    },
+    {
+      "epoch": 1.5,
+      "learning_rate": 1.0021276595744682e-05,
+      "loss": 0.0616,
+      "step": 7035
+    },
+    {
+      "epoch": 1.6,
+      "learning_rate": 9.356028368794327e-06,
+      "loss": 0.0579,
+      "step": 7504
+    },
+    {
+      "epoch": 1.7,
+      "learning_rate": 8.690780141843971e-06,
+      "loss": 0.0561,
+      "step": 7973
+    },
+    {
+      "epoch": 1.8,
+      "learning_rate": 8.025531914893618e-06,
+      "loss": 0.0565,
+      "step": 8442
+    },
+    {
+      "epoch": 1.9,
+      "learning_rate": 7.360283687943263e-06,
+      "loss": 0.0551,
+      "step": 8911
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 6.695035460992908e-06,
+      "loss": 0.0592,
+      "step": 9380
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.9760701501954842,
+      "eval_f1": 0.8429752066115702,
+      "eval_loss": 0.08603910356760025,
+      "eval_precision": 0.8525483783047152,
+      "eval_recall": 0.8336146397796926,
+      "eval_runtime": 36.5619,
+      "eval_samples_per_second": 259.259,
+      "eval_steps_per_second": 16.219,
+      "step": 9400
+    },
+    {
+      "epoch": 2.1,
+      "learning_rate": 6.029787234042554e-06,
+      "loss": 0.0406,
+      "step": 9849
+    },
+    {
+      "epoch": 2.2,
+      "learning_rate": 5.3645390070921985e-06,
+      "loss": 0.0387,
+      "step": 10318
+    },
+    {
+      "epoch": 2.3,
+      "learning_rate": 4.699290780141844e-06,
+      "loss": 0.0399,
+      "step": 10787
+    },
+    {
+      "epoch": 2.39,
+      "learning_rate": 4.03404255319149e-06,
+      "loss": 0.0407,
+      "step": 11256
+    },
+    {
+      "epoch": 2.49,
+      "learning_rate": 3.368794326241135e-06,
+      "loss": 0.0394,
+      "step": 11725
+    },
+    {
+      "epoch": 2.59,
+      "learning_rate": 2.70354609929078e-06,
+      "loss": 0.0369,
+      "step": 12194
+    },
+    {
+      "epoch": 2.69,
+      "learning_rate": 2.0382978723404254e-06,
+      "loss": 0.0408,
+      "step": 12663
+    },
+    {
+      "epoch": 2.79,
+      "learning_rate": 1.373049645390071e-06,
+      "loss": 0.0363,
+      "step": 13132
+    },
+    {
+      "epoch": 2.89,
+      "learning_rate": 7.078014184397164e-07,
+      "loss": 0.0392,
+      "step": 13601
+    },
+    {
+      "epoch": 2.99,
+      "learning_rate": 4.2553191489361707e-08,
+      "loss": 0.0392,
+      "step": 14070
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.9780515276066022,
+      "eval_f1": 0.8619402985074628,
+      "eval_loss": 0.08666320890188217,
+      "eval_precision": 0.8620168813860506,
+      "eval_recall": 0.8618637292351425,
+      "eval_runtime": 36.7418,
+      "eval_samples_per_second": 257.989,
+      "eval_steps_per_second": 16.14,
+      "step": 14100
+    }
+  ],
+  "max_steps": 14100,
+  "num_train_epochs": 3,
+  "total_flos": 580572089788770.0,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:15c7912e3146ffbf0b4944c488054f98500ed7228181655063dab2a1d36f7001
+size 3311