sunilregmi commited on
Commit
1ab791b
1 Parent(s): a214c17

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. checkpoint-1353-epoch-3/config.json +44 -0
  2. checkpoint-1353-epoch-3/model.safetensors +3 -0
  3. checkpoint-1353-epoch-3/model_args.json +1 -0
  4. checkpoint-1353-epoch-3/optimizer.pt +3 -0
  5. checkpoint-1353-epoch-3/scheduler.pt +3 -0
  6. checkpoint-1353-epoch-3/special_tokens_map.json +7 -0
  7. checkpoint-1353-epoch-3/tokenizer_config.json +58 -0
  8. checkpoint-1353-epoch-3/training_args.bin +3 -0
  9. checkpoint-1353-epoch-3/vocab.txt +0 -0
  10. checkpoint-1804-epoch-4/config.json +44 -0
  11. checkpoint-1804-epoch-4/model.safetensors +3 -0
  12. checkpoint-1804-epoch-4/model_args.json +1 -0
  13. checkpoint-1804-epoch-4/optimizer.pt +3 -0
  14. checkpoint-1804-epoch-4/scheduler.pt +3 -0
  15. checkpoint-1804-epoch-4/special_tokens_map.json +7 -0
  16. checkpoint-1804-epoch-4/tokenizer_config.json +58 -0
  17. checkpoint-1804-epoch-4/training_args.bin +3 -0
  18. checkpoint-1804-epoch-4/vocab.txt +0 -0
  19. checkpoint-2000/config.json +166 -0
  20. checkpoint-2000/model.safetensors +3 -0
  21. checkpoint-2000/model_args.json +1 -0
  22. checkpoint-2000/optimizer.pt +3 -0
  23. checkpoint-2000/scheduler.pt +3 -0
  24. checkpoint-2000/special_tokens_map.json +7 -0
  25. checkpoint-2000/tokenizer_config.json +58 -0
  26. checkpoint-2000/training_args.bin +3 -0
  27. checkpoint-2000/vocab.txt +0 -0
  28. checkpoint-2255-epoch-5/config.json +44 -0
  29. checkpoint-2255-epoch-5/model.safetensors +3 -0
  30. checkpoint-2255-epoch-5/model_args.json +1 -0
  31. checkpoint-2255-epoch-5/optimizer.pt +3 -0
  32. checkpoint-2255-epoch-5/scheduler.pt +3 -0
  33. checkpoint-2255-epoch-5/special_tokens_map.json +7 -0
  34. checkpoint-2255-epoch-5/tokenizer_config.json +58 -0
  35. checkpoint-2255-epoch-5/training_args.bin +3 -0
  36. checkpoint-2255-epoch-5/vocab.txt +0 -0
  37. checkpoint-451-epoch-1/config.json +44 -0
  38. checkpoint-451-epoch-1/model.safetensors +3 -0
  39. checkpoint-451-epoch-1/model_args.json +1 -0
  40. checkpoint-451-epoch-1/optimizer.pt +3 -0
  41. checkpoint-451-epoch-1/scheduler.pt +3 -0
  42. checkpoint-451-epoch-1/special_tokens_map.json +7 -0
  43. checkpoint-451-epoch-1/tokenizer_config.json +58 -0
  44. checkpoint-451-epoch-1/training_args.bin +3 -0
  45. checkpoint-451-epoch-1/vocab.txt +0 -0
  46. checkpoint-902-epoch-2/config.json +44 -0
  47. checkpoint-902-epoch-2/model.safetensors +3 -0
  48. checkpoint-902-epoch-2/model_args.json +1 -0
  49. checkpoint-902-epoch-2/optimizer.pt +3 -0
  50. checkpoint-902-epoch-2/scheduler.pt +3 -0
checkpoint-1353-epoch-3/config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "NepBERTa/NepBERTa",
3
+ "architectures": [
4
+ "BertForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "B-LOC",
13
+ "1": "O",
14
+ "2": "I-LOC",
15
+ "3": "B-PER",
16
+ "4": "B-ORG",
17
+ "5": "I-PER",
18
+ "6": "I-ORG"
19
+ },
20
+ "initializer_range": 0.02,
21
+ "intermediate_size": 3072,
22
+ "label2id": {
23
+ "B-LOC": 0,
24
+ "B-ORG": 4,
25
+ "B-PER": 3,
26
+ "I-LOC": 2,
27
+ "I-ORG": 6,
28
+ "I-PER": 5,
29
+ "O": 1
30
+ },
31
+ "layer_norm_eps": 1e-12,
32
+ "max_position_embeddings": 512,
33
+ "model_type": "bert",
34
+ "num_attention_heads": 12,
35
+ "num_hidden_layers": 12,
36
+ "output_hidden_state": true,
37
+ "pad_token_id": 0,
38
+ "position_embedding_type": "absolute",
39
+ "torch_dtype": "float32",
40
+ "transformers_version": "4.38.2",
41
+ "type_vocab_size": 2,
42
+ "use_cache": true,
43
+ "vocab_size": 30522
44
+ }
checkpoint-1353-epoch-3/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8f049ab429a4697e96a055432d6ea4e786a05c8ae4b4f4be74545cfabc48ec4
3
+ size 435611468
checkpoint-1353-epoch-3/model_args.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"adafactor_beta1": null, "adafactor_clip_threshold": 1.0, "adafactor_decay_rate": -0.8, "adafactor_eps": [1e-30, 0.001], "adafactor_relative_step": true, "adafactor_scale_parameter": true, "adafactor_warmup_init": true, "adam_betas": [0.9, 0.999], "adam_epsilon": 1e-08, "best_model_dir": "outputs/best_model", "cache_dir": "cache_dir/", "config": {}, "cosine_schedule_num_cycles": 0.5, "custom_layer_parameters": [], "custom_parameter_groups": [], "dataloader_num_workers": 0, "do_lower_case": false, "dynamic_quantize": false, "early_stopping_consider_epochs": false, "early_stopping_delta": 0, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 3, "encoding": null, "eval_batch_size": 8, "evaluate_during_training": false, "evaluate_during_training_silent": true, "evaluate_during_training_steps": 2000, "evaluate_during_training_verbose": false, "evaluate_each_epoch": true, "fp16": true, "gradient_accumulation_steps": 1, "learning_rate": 0.0001, "local_rank": -1, "logging_steps": 50, "loss_type": null, "loss_args": {}, "manual_seed": null, "max_grad_norm": 1.0, "max_seq_length": 128, "model_name": "NepBERTa/NepBERTa", "model_type": "bert", "multiprocessing_chunksize": -1, "n_gpu": 1, "no_cache": false, "no_save": false, "not_saved_args": [], "num_train_epochs": 5, "optimizer": "AdamW", "output_dir": "outputs/", "overwrite_output_dir": true, "polynomial_decay_schedule_lr_end": 1e-07, "polynomial_decay_schedule_power": 1.0, "process_count": 1, "quantized_model": false, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": true, "save_model_every_epoch": true, "save_optimizer_and_scheduler": true, "save_steps": 2000, "scheduler": "linear_schedule_with_warmup", "silent": false, "skip_special_tokens": true, "tensorboard_dir": null, "thread_count": null, "tokenizer_name": null, "tokenizer_type": null, "train_batch_size": 8, "train_custom_parameters_only": false, "trust_remote_code": false, "use_cached_eval_features": false, "use_early_stopping": false, "use_hf_datasets": false, "use_multiprocessing": true, "use_multiprocessing_for_evaluation": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 136, "weight_decay": 0.0, "model_class": "NERModel", "classification_report": false, "labels_list": ["B-LOC", "O", "I-LOC", "B-PER", "B-ORG", "I-PER", "I-ORG"], "lazy_loading": false, "lazy_loading_start_line": 0, "onnx": false, "special_tokens_list": []}
checkpoint-1353-epoch-3/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93cf90f57c772bed1ab0694b1c61b20d70073420f70bdec3399b40bc61e309e1
3
+ size 871342650
checkpoint-1353-epoch-3/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e05079cc1ad0c8611bb08b3ea7663fa61e96f7b77dad09026711f1a6b0e9aa6
3
+ size 1064
checkpoint-1353-epoch-3/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
checkpoint-1353-epoch-3/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[CLS]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[PAD]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[MASK]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[UNK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": false,
48
+ "from_tf": true,
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 1000000000000000019884624838656,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": null,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
checkpoint-1353-epoch-3/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87dc53d5ea23f82a753872c573fd1e5400357b6a8bb49ae88d5e9d1783129665
3
+ size 3512
checkpoint-1353-epoch-3/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1804-epoch-4/config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "NepBERTa/NepBERTa",
3
+ "architectures": [
4
+ "BertForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "B-LOC",
13
+ "1": "O",
14
+ "2": "I-LOC",
15
+ "3": "B-PER",
16
+ "4": "B-ORG",
17
+ "5": "I-PER",
18
+ "6": "I-ORG"
19
+ },
20
+ "initializer_range": 0.02,
21
+ "intermediate_size": 3072,
22
+ "label2id": {
23
+ "B-LOC": 0,
24
+ "B-ORG": 4,
25
+ "B-PER": 3,
26
+ "I-LOC": 2,
27
+ "I-ORG": 6,
28
+ "I-PER": 5,
29
+ "O": 1
30
+ },
31
+ "layer_norm_eps": 1e-12,
32
+ "max_position_embeddings": 512,
33
+ "model_type": "bert",
34
+ "num_attention_heads": 12,
35
+ "num_hidden_layers": 12,
36
+ "output_hidden_state": true,
37
+ "pad_token_id": 0,
38
+ "position_embedding_type": "absolute",
39
+ "torch_dtype": "float32",
40
+ "transformers_version": "4.38.2",
41
+ "type_vocab_size": 2,
42
+ "use_cache": true,
43
+ "vocab_size": 30522
44
+ }
checkpoint-1804-epoch-4/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7740f7f628b3e30a0de4bf85ac6cd00ccd11efc9c23d33d28cd3d8f46fbf359b
3
+ size 435611468
checkpoint-1804-epoch-4/model_args.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"adafactor_beta1": null, "adafactor_clip_threshold": 1.0, "adafactor_decay_rate": -0.8, "adafactor_eps": [1e-30, 0.001], "adafactor_relative_step": true, "adafactor_scale_parameter": true, "adafactor_warmup_init": true, "adam_betas": [0.9, 0.999], "adam_epsilon": 1e-08, "best_model_dir": "outputs/best_model", "cache_dir": "cache_dir/", "config": {}, "cosine_schedule_num_cycles": 0.5, "custom_layer_parameters": [], "custom_parameter_groups": [], "dataloader_num_workers": 0, "do_lower_case": false, "dynamic_quantize": false, "early_stopping_consider_epochs": false, "early_stopping_delta": 0, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 3, "encoding": null, "eval_batch_size": 8, "evaluate_during_training": false, "evaluate_during_training_silent": true, "evaluate_during_training_steps": 2000, "evaluate_during_training_verbose": false, "evaluate_each_epoch": true, "fp16": true, "gradient_accumulation_steps": 1, "learning_rate": 0.0001, "local_rank": -1, "logging_steps": 50, "loss_type": null, "loss_args": {}, "manual_seed": null, "max_grad_norm": 1.0, "max_seq_length": 128, "model_name": "NepBERTa/NepBERTa", "model_type": "bert", "multiprocessing_chunksize": -1, "n_gpu": 1, "no_cache": false, "no_save": false, "not_saved_args": [], "num_train_epochs": 5, "optimizer": "AdamW", "output_dir": "outputs/", "overwrite_output_dir": true, "polynomial_decay_schedule_lr_end": 1e-07, "polynomial_decay_schedule_power": 1.0, "process_count": 1, "quantized_model": false, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": true, "save_model_every_epoch": true, "save_optimizer_and_scheduler": true, "save_steps": 2000, "scheduler": "linear_schedule_with_warmup", "silent": false, "skip_special_tokens": true, "tensorboard_dir": null, "thread_count": null, "tokenizer_name": null, "tokenizer_type": null, "train_batch_size": 8, "train_custom_parameters_only": false, "trust_remote_code": false, "use_cached_eval_features": false, "use_early_stopping": false, "use_hf_datasets": false, "use_multiprocessing": true, "use_multiprocessing_for_evaluation": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 136, "weight_decay": 0.0, "model_class": "NERModel", "classification_report": false, "labels_list": ["B-LOC", "O", "I-LOC", "B-PER", "B-ORG", "I-PER", "I-ORG"], "lazy_loading": false, "lazy_loading_start_line": 0, "onnx": false, "special_tokens_list": []}
checkpoint-1804-epoch-4/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c0a7915473e01e3705582ee655879bec855dbbcb7c2c807c8358693563d60e6
3
+ size 871342650
checkpoint-1804-epoch-4/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92e14973499563b2c2125c4e45c313274d3ed56f9182ff998b322e9b680deb1b
3
+ size 1064
checkpoint-1804-epoch-4/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
checkpoint-1804-epoch-4/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[CLS]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[PAD]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[MASK]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[UNK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": false,
48
+ "from_tf": true,
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 1000000000000000019884624838656,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": null,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
checkpoint-1804-epoch-4/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87dc53d5ea23f82a753872c573fd1e5400357b6a8bb49ae88d5e9d1783129665
3
+ size 3512
checkpoint-1804-epoch-4/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2000/config.json ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "NepBERTa/NepBERTa",
3
+ "architectures": [
4
+ "BertForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "NN",
13
+ "1": "IKO",
14
+ "2": "FB",
15
+ "3": "II",
16
+ "4": "JX",
17
+ "5": "CC",
18
+ "6": "IKM",
19
+ "7": "VVYN1",
20
+ "8": "YF",
21
+ "9": "DDX",
22
+ "10": "IE",
23
+ "11": "NP",
24
+ "12": "VQ",
25
+ "13": "VE",
26
+ "14": "IA",
27
+ "15": "VVYX2",
28
+ "16": "DJX",
29
+ "17": "TT",
30
+ "18": "VI",
31
+ "19": "RR",
32
+ "20": "MM",
33
+ "21": "VDX",
34
+ "22": "RD",
35
+ "23": "IKF",
36
+ "24": "VN",
37
+ "25": "IKX",
38
+ "26": "MLO",
39
+ "27": "IH",
40
+ "28": "PXH",
41
+ "29": "JM",
42
+ "30": "PRF",
43
+ "31": "YM",
44
+ "32": "MOX",
45
+ "33": "MOM",
46
+ "34": "PMXKO",
47
+ "35": "JO",
48
+ "36": "DKM",
49
+ "37": "CSB",
50
+ "38": "VVMX1",
51
+ "39": "DKX",
52
+ "40": "PRFKO",
53
+ "41": "PRFKX",
54
+ "42": "DDM",
55
+ "43": "PMXKM",
56
+ "44": "PMX",
57
+ "45": "VVMX2",
58
+ "46": "RK",
59
+ "47": "VDO",
60
+ "48": "MLX",
61
+ "49": "DDO",
62
+ "50": "PRFKM",
63
+ "51": "FZ",
64
+ "52": "PTH",
65
+ "53": "DGX",
66
+ "54": "VOYN1",
67
+ "55": "VCM",
68
+ "56": "VVYM1F",
69
+ "57": "DGM",
70
+ "58": "RJ",
71
+ "59": "VDM",
72
+ "60": "FF",
73
+ "61": "VR",
74
+ "62": "PMXKX",
75
+ "63": "DKO",
76
+ "64": "DGO",
77
+ "65": "VOMX2",
78
+ "66": "VVYN1F",
79
+ "67": "MLF"
80
+ },
81
+ "initializer_range": 0.02,
82
+ "intermediate_size": 3072,
83
+ "label2id": {
84
+ "CC": 5,
85
+ "CSB": 37,
86
+ "DDM": 42,
87
+ "DDO": 49,
88
+ "DDX": 9,
89
+ "DGM": 57,
90
+ "DGO": 64,
91
+ "DGX": 53,
92
+ "DJX": 16,
93
+ "DKM": 36,
94
+ "DKO": 63,
95
+ "DKX": 39,
96
+ "FB": 2,
97
+ "FF": 60,
98
+ "FZ": 51,
99
+ "IA": 14,
100
+ "IE": 10,
101
+ "IH": 27,
102
+ "II": 3,
103
+ "IKF": 23,
104
+ "IKM": 6,
105
+ "IKO": 1,
106
+ "IKX": 25,
107
+ "JM": 29,
108
+ "JO": 35,
109
+ "JX": 4,
110
+ "MLF": 67,
111
+ "MLO": 26,
112
+ "MLX": 48,
113
+ "MM": 20,
114
+ "MOM": 33,
115
+ "MOX": 32,
116
+ "NN": 0,
117
+ "NP": 11,
118
+ "PMX": 44,
119
+ "PMXKM": 43,
120
+ "PMXKO": 34,
121
+ "PMXKX": 62,
122
+ "PRF": 30,
123
+ "PRFKM": 50,
124
+ "PRFKO": 40,
125
+ "PRFKX": 41,
126
+ "PTH": 52,
127
+ "PXH": 28,
128
+ "RD": 22,
129
+ "RJ": 58,
130
+ "RK": 46,
131
+ "RR": 19,
132
+ "TT": 17,
133
+ "VCM": 55,
134
+ "VDM": 59,
135
+ "VDO": 47,
136
+ "VDX": 21,
137
+ "VE": 13,
138
+ "VI": 18,
139
+ "VN": 24,
140
+ "VOMX2": 65,
141
+ "VOYN1": 54,
142
+ "VQ": 12,
143
+ "VR": 61,
144
+ "VVMX1": 38,
145
+ "VVMX2": 45,
146
+ "VVYM1F": 56,
147
+ "VVYN1": 7,
148
+ "VVYN1F": 66,
149
+ "VVYX2": 15,
150
+ "YF": 8,
151
+ "YM": 31
152
+ },
153
+ "layer_norm_eps": 1e-12,
154
+ "max_position_embeddings": 512,
155
+ "model_type": "bert",
156
+ "num_attention_heads": 12,
157
+ "num_hidden_layers": 12,
158
+ "output_hidden_state": true,
159
+ "pad_token_id": 0,
160
+ "position_embedding_type": "absolute",
161
+ "torch_dtype": "float32",
162
+ "transformers_version": "4.38.2",
163
+ "type_vocab_size": 2,
164
+ "use_cache": true,
165
+ "vocab_size": 30522
166
+ }
checkpoint-2000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c88d77cd0236bd6f04a6ba8c775c0c888c8d72fad518c18773d91426adc156b9
3
+ size 435799104
checkpoint-2000/model_args.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"adafactor_beta1": null, "adafactor_clip_threshold": 1.0, "adafactor_decay_rate": -0.8, "adafactor_eps": [1e-30, 0.001], "adafactor_relative_step": true, "adafactor_scale_parameter": true, "adafactor_warmup_init": true, "adam_betas": [0.9, 0.999], "adam_epsilon": 1e-08, "best_model_dir": "outputs/best_model", "cache_dir": "cache_dir/", "config": {}, "cosine_schedule_num_cycles": 0.5, "custom_layer_parameters": [], "custom_parameter_groups": [], "dataloader_num_workers": 0, "do_lower_case": false, "dynamic_quantize": false, "early_stopping_consider_epochs": false, "early_stopping_delta": 0, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 3, "encoding": null, "eval_batch_size": 8, "evaluate_during_training": false, "evaluate_during_training_silent": true, "evaluate_during_training_steps": 2000, "evaluate_during_training_verbose": false, "evaluate_each_epoch": true, "fp16": true, "gradient_accumulation_steps": 1, "learning_rate": 0.0001, "local_rank": -1, "logging_steps": 50, "loss_type": null, "loss_args": {}, "manual_seed": null, "max_grad_norm": 1.0, "max_seq_length": 128, "model_name": "NepBERTa/NepBERTa", "model_type": "bert", "multiprocessing_chunksize": -1, "n_gpu": 1, "no_cache": false, "no_save": false, "not_saved_args": [], "num_train_epochs": 5, "optimizer": "AdamW", "output_dir": "outputs/", "overwrite_output_dir": true, "polynomial_decay_schedule_lr_end": 1e-07, "polynomial_decay_schedule_power": 1.0, "process_count": 1, "quantized_model": false, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": true, "save_model_every_epoch": false, "save_optimizer_and_scheduler": true, "save_steps": 2000, "scheduler": "linear_schedule_with_warmup", "silent": false, "skip_special_tokens": true, "tensorboard_dir": null, "thread_count": null, "tokenizer_name": null, "tokenizer_type": null, "train_batch_size": 8, "train_custom_parameters_only": false, "trust_remote_code": false, "use_cached_eval_features": false, "use_early_stopping": false, "use_hf_datasets": false, "use_multiprocessing": true, "use_multiprocessing_for_evaluation": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 136, "weight_decay": 0.0, "model_class": "NERModel", "classification_report": false, "labels_list": ["NN", "IKO", "FB", "II", "JX", "CC", "IKM", "VVYN1", "YF", "DDX", "IE", "NP", "VQ", "VE", "IA", "VVYX2", "DJX", "TT", "VI", "RR", "MM", "VDX", "RD", "IKF", "VN", "IKX", "MLO", "IH", "PXH", "JM", "PRF", "YM", "MOX", "MOM", "PMXKO", "JO", "DKM", "CSB", "VVMX1", "DKX", "PRFKO", "PRFKX", "DDM", "PMXKM", "PMX", "VVMX2", "RK", "VDO", "MLX", "DDO", "PRFKM", "FZ", "PTH", "DGX", "VOYN1", "VCM", "VVYM1F", "DGM", "RJ", "VDM", "FF", "VR", "PMXKX", "DKO", "DGO", "VOMX2", "VVYN1F", "MLF"], "lazy_loading": false, "lazy_loading_start_line": 0, "onnx": false, "special_tokens_list": []}
checkpoint-2000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:533200c36022eb9c7320d0d3f900fc61fa4dbdef3e5012a16cdb7142fa499487
3
+ size 871717946
checkpoint-2000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b87825cd4df87507ff16a4fbca7645bf193e5af1fcaf0d1f733f49fad487816
3
+ size 1064
checkpoint-2000/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
checkpoint-2000/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[CLS]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[PAD]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[MASK]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[UNK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": false,
48
+ "from_tf": true,
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 1000000000000000019884624838656,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": null,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
checkpoint-2000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b6ba489767cb2d1e6600114d94ad533d3938d6c743fdd4d2e3a3d2ff0c9a46d
3
+ size 4152
checkpoint-2000/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2255-epoch-5/config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "NepBERTa/NepBERTa",
3
+ "architectures": [
4
+ "BertForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "B-LOC",
13
+ "1": "O",
14
+ "2": "I-LOC",
15
+ "3": "B-PER",
16
+ "4": "B-ORG",
17
+ "5": "I-PER",
18
+ "6": "I-ORG"
19
+ },
20
+ "initializer_range": 0.02,
21
+ "intermediate_size": 3072,
22
+ "label2id": {
23
+ "B-LOC": 0,
24
+ "B-ORG": 4,
25
+ "B-PER": 3,
26
+ "I-LOC": 2,
27
+ "I-ORG": 6,
28
+ "I-PER": 5,
29
+ "O": 1
30
+ },
31
+ "layer_norm_eps": 1e-12,
32
+ "max_position_embeddings": 512,
33
+ "model_type": "bert",
34
+ "num_attention_heads": 12,
35
+ "num_hidden_layers": 12,
36
+ "output_hidden_state": true,
37
+ "pad_token_id": 0,
38
+ "position_embedding_type": "absolute",
39
+ "torch_dtype": "float32",
40
+ "transformers_version": "4.38.2",
41
+ "type_vocab_size": 2,
42
+ "use_cache": true,
43
+ "vocab_size": 30522
44
+ }
checkpoint-2255-epoch-5/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebaaaa113701bac2fa50c292694fbf4fd3fd3d84bd981f984a771ed2a04f719f
3
+ size 435611468
checkpoint-2255-epoch-5/model_args.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"adafactor_beta1": null, "adafactor_clip_threshold": 1.0, "adafactor_decay_rate": -0.8, "adafactor_eps": [1e-30, 0.001], "adafactor_relative_step": true, "adafactor_scale_parameter": true, "adafactor_warmup_init": true, "adam_betas": [0.9, 0.999], "adam_epsilon": 1e-08, "best_model_dir": "outputs/best_model", "cache_dir": "cache_dir/", "config": {}, "cosine_schedule_num_cycles": 0.5, "custom_layer_parameters": [], "custom_parameter_groups": [], "dataloader_num_workers": 0, "do_lower_case": false, "dynamic_quantize": false, "early_stopping_consider_epochs": false, "early_stopping_delta": 0, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 3, "encoding": null, "eval_batch_size": 8, "evaluate_during_training": false, "evaluate_during_training_silent": true, "evaluate_during_training_steps": 2000, "evaluate_during_training_verbose": false, "evaluate_each_epoch": true, "fp16": true, "gradient_accumulation_steps": 1, "learning_rate": 0.0001, "local_rank": -1, "logging_steps": 50, "loss_type": null, "loss_args": {}, "manual_seed": null, "max_grad_norm": 1.0, "max_seq_length": 128, "model_name": "NepBERTa/NepBERTa", "model_type": "bert", "multiprocessing_chunksize": -1, "n_gpu": 1, "no_cache": false, "no_save": false, "not_saved_args": [], "num_train_epochs": 5, "optimizer": "AdamW", "output_dir": "outputs/", "overwrite_output_dir": true, "polynomial_decay_schedule_lr_end": 1e-07, "polynomial_decay_schedule_power": 1.0, "process_count": 1, "quantized_model": false, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": true, "save_model_every_epoch": true, "save_optimizer_and_scheduler": true, "save_steps": 2000, "scheduler": "linear_schedule_with_warmup", "silent": false, "skip_special_tokens": true, "tensorboard_dir": null, "thread_count": null, "tokenizer_name": null, "tokenizer_type": null, "train_batch_size": 8, "train_custom_parameters_only": false, "trust_remote_code": false, "use_cached_eval_features": false, "use_early_stopping": false, "use_hf_datasets": false, "use_multiprocessing": true, "use_multiprocessing_for_evaluation": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 136, "weight_decay": 0.0, "model_class": "NERModel", "classification_report": false, "labels_list": ["B-LOC", "O", "I-LOC", "B-PER", "B-ORG", "I-PER", "I-ORG"], "lazy_loading": false, "lazy_loading_start_line": 0, "onnx": false, "special_tokens_list": []}
checkpoint-2255-epoch-5/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45d1e276d4d9feb5d03305eb6c953b0f1d74459358cef1d4924464606bd7546c
3
+ size 871342650
checkpoint-2255-epoch-5/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3968933a8b2f9f50f0e634e985cbd5278e68dd275ee6bd8021f5151f0795cbe6
3
+ size 1064
checkpoint-2255-epoch-5/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
checkpoint-2255-epoch-5/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[CLS]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[PAD]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[MASK]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[UNK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": false,
48
+ "from_tf": true,
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 1000000000000000019884624838656,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": null,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
checkpoint-2255-epoch-5/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87dc53d5ea23f82a753872c573fd1e5400357b6a8bb49ae88d5e9d1783129665
3
+ size 3512
checkpoint-2255-epoch-5/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-451-epoch-1/config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "NepBERTa/NepBERTa",
3
+ "architectures": [
4
+ "BertForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "B-LOC",
13
+ "1": "O",
14
+ "2": "I-LOC",
15
+ "3": "B-PER",
16
+ "4": "B-ORG",
17
+ "5": "I-PER",
18
+ "6": "I-ORG"
19
+ },
20
+ "initializer_range": 0.02,
21
+ "intermediate_size": 3072,
22
+ "label2id": {
23
+ "B-LOC": 0,
24
+ "B-ORG": 4,
25
+ "B-PER": 3,
26
+ "I-LOC": 2,
27
+ "I-ORG": 6,
28
+ "I-PER": 5,
29
+ "O": 1
30
+ },
31
+ "layer_norm_eps": 1e-12,
32
+ "max_position_embeddings": 512,
33
+ "model_type": "bert",
34
+ "num_attention_heads": 12,
35
+ "num_hidden_layers": 12,
36
+ "output_hidden_state": true,
37
+ "pad_token_id": 0,
38
+ "position_embedding_type": "absolute",
39
+ "torch_dtype": "float32",
40
+ "transformers_version": "4.38.2",
41
+ "type_vocab_size": 2,
42
+ "use_cache": true,
43
+ "vocab_size": 30522
44
+ }
checkpoint-451-epoch-1/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c292f5db6d8160c07db3d74bc173ed8d36b9bd3ef12eae1859c36291f686f5c5
3
+ size 435611468
checkpoint-451-epoch-1/model_args.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"adafactor_beta1": null, "adafactor_clip_threshold": 1.0, "adafactor_decay_rate": -0.8, "adafactor_eps": [1e-30, 0.001], "adafactor_relative_step": true, "adafactor_scale_parameter": true, "adafactor_warmup_init": true, "adam_betas": [0.9, 0.999], "adam_epsilon": 1e-08, "best_model_dir": "outputs/best_model", "cache_dir": "cache_dir/", "config": {}, "cosine_schedule_num_cycles": 0.5, "custom_layer_parameters": [], "custom_parameter_groups": [], "dataloader_num_workers": 0, "do_lower_case": false, "dynamic_quantize": false, "early_stopping_consider_epochs": false, "early_stopping_delta": 0, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 3, "encoding": null, "eval_batch_size": 8, "evaluate_during_training": false, "evaluate_during_training_silent": true, "evaluate_during_training_steps": 2000, "evaluate_during_training_verbose": false, "evaluate_each_epoch": true, "fp16": true, "gradient_accumulation_steps": 1, "learning_rate": 0.0001, "local_rank": -1, "logging_steps": 50, "loss_type": null, "loss_args": {}, "manual_seed": null, "max_grad_norm": 1.0, "max_seq_length": 128, "model_name": "NepBERTa/NepBERTa", "model_type": "bert", "multiprocessing_chunksize": -1, "n_gpu": 1, "no_cache": false, "no_save": false, "not_saved_args": [], "num_train_epochs": 5, "optimizer": "AdamW", "output_dir": "outputs/", "overwrite_output_dir": true, "polynomial_decay_schedule_lr_end": 1e-07, "polynomial_decay_schedule_power": 1.0, "process_count": 1, "quantized_model": false, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": true, "save_model_every_epoch": true, "save_optimizer_and_scheduler": true, "save_steps": 2000, "scheduler": "linear_schedule_with_warmup", "silent": false, "skip_special_tokens": true, "tensorboard_dir": null, "thread_count": null, "tokenizer_name": null, "tokenizer_type": null, "train_batch_size": 8, "train_custom_parameters_only": false, "trust_remote_code": false, "use_cached_eval_features": false, "use_early_stopping": false, "use_hf_datasets": false, "use_multiprocessing": true, "use_multiprocessing_for_evaluation": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 136, "weight_decay": 0.0, "model_class": "NERModel", "classification_report": false, "labels_list": ["B-LOC", "O", "I-LOC", "B-PER", "B-ORG", "I-PER", "I-ORG"], "lazy_loading": false, "lazy_loading_start_line": 0, "onnx": false, "special_tokens_list": []}
checkpoint-451-epoch-1/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:775c1ed59de436294c40b3e2f0cc1761f9126bc62cdb2bb1cd0ba1d462f1bd62
3
+ size 871342650
checkpoint-451-epoch-1/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6fa5964fd47cb197a7e053fced6cb6deb04817917c01ce818917cc62dd39a1e
3
+ size 1064
checkpoint-451-epoch-1/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
checkpoint-451-epoch-1/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[CLS]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[PAD]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[MASK]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[UNK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": false,
48
+ "from_tf": true,
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 1000000000000000019884624838656,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": null,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
checkpoint-451-epoch-1/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87dc53d5ea23f82a753872c573fd1e5400357b6a8bb49ae88d5e9d1783129665
3
+ size 3512
checkpoint-451-epoch-1/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-902-epoch-2/config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "NepBERTa/NepBERTa",
3
+ "architectures": [
4
+ "BertForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "B-LOC",
13
+ "1": "O",
14
+ "2": "I-LOC",
15
+ "3": "B-PER",
16
+ "4": "B-ORG",
17
+ "5": "I-PER",
18
+ "6": "I-ORG"
19
+ },
20
+ "initializer_range": 0.02,
21
+ "intermediate_size": 3072,
22
+ "label2id": {
23
+ "B-LOC": 0,
24
+ "B-ORG": 4,
25
+ "B-PER": 3,
26
+ "I-LOC": 2,
27
+ "I-ORG": 6,
28
+ "I-PER": 5,
29
+ "O": 1
30
+ },
31
+ "layer_norm_eps": 1e-12,
32
+ "max_position_embeddings": 512,
33
+ "model_type": "bert",
34
+ "num_attention_heads": 12,
35
+ "num_hidden_layers": 12,
36
+ "output_hidden_state": true,
37
+ "pad_token_id": 0,
38
+ "position_embedding_type": "absolute",
39
+ "torch_dtype": "float32",
40
+ "transformers_version": "4.38.2",
41
+ "type_vocab_size": 2,
42
+ "use_cache": true,
43
+ "vocab_size": 30522
44
+ }
checkpoint-902-epoch-2/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48bbdf90fdd6325828cb3cd00f8430e5e88dc3d609b0444a0f8f2c2340b74d2b
3
+ size 435611468
checkpoint-902-epoch-2/model_args.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"adafactor_beta1": null, "adafactor_clip_threshold": 1.0, "adafactor_decay_rate": -0.8, "adafactor_eps": [1e-30, 0.001], "adafactor_relative_step": true, "adafactor_scale_parameter": true, "adafactor_warmup_init": true, "adam_betas": [0.9, 0.999], "adam_epsilon": 1e-08, "best_model_dir": "outputs/best_model", "cache_dir": "cache_dir/", "config": {}, "cosine_schedule_num_cycles": 0.5, "custom_layer_parameters": [], "custom_parameter_groups": [], "dataloader_num_workers": 0, "do_lower_case": false, "dynamic_quantize": false, "early_stopping_consider_epochs": false, "early_stopping_delta": 0, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 3, "encoding": null, "eval_batch_size": 8, "evaluate_during_training": false, "evaluate_during_training_silent": true, "evaluate_during_training_steps": 2000, "evaluate_during_training_verbose": false, "evaluate_each_epoch": true, "fp16": true, "gradient_accumulation_steps": 1, "learning_rate": 0.0001, "local_rank": -1, "logging_steps": 50, "loss_type": null, "loss_args": {}, "manual_seed": null, "max_grad_norm": 1.0, "max_seq_length": 128, "model_name": "NepBERTa/NepBERTa", "model_type": "bert", "multiprocessing_chunksize": -1, "n_gpu": 1, "no_cache": false, "no_save": false, "not_saved_args": [], "num_train_epochs": 5, "optimizer": "AdamW", "output_dir": "outputs/", "overwrite_output_dir": true, "polynomial_decay_schedule_lr_end": 1e-07, "polynomial_decay_schedule_power": 1.0, "process_count": 1, "quantized_model": false, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": true, "save_model_every_epoch": true, "save_optimizer_and_scheduler": true, "save_steps": 2000, "scheduler": "linear_schedule_with_warmup", "silent": false, "skip_special_tokens": true, "tensorboard_dir": null, "thread_count": null, "tokenizer_name": null, "tokenizer_type": null, "train_batch_size": 8, "train_custom_parameters_only": false, "trust_remote_code": false, "use_cached_eval_features": false, "use_early_stopping": false, "use_hf_datasets": false, "use_multiprocessing": true, "use_multiprocessing_for_evaluation": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 136, "weight_decay": 0.0, "model_class": "NERModel", "classification_report": false, "labels_list": ["B-LOC", "O", "I-LOC", "B-PER", "B-ORG", "I-PER", "I-ORG"], "lazy_loading": false, "lazy_loading_start_line": 0, "onnx": false, "special_tokens_list": []}
checkpoint-902-epoch-2/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71365293e6299304b928b74c83035acb0a489379183f7677334b0e9c229a1d31
3
+ size 871342650
checkpoint-902-epoch-2/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96f86d24852ba7173b3eede3843cc590412d49b970261ec0571103597d51a4a1
3
+ size 1064