temporary0-0name commited on
Commit
6e4a371
1 Parent(s): 97dd670

Training in progress, step 100

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  license: apache-2.0
3
- base_model: temporary0-0name/run_opt
4
  tags:
5
  - generated_from_trainer
6
  datasets:
@@ -15,7 +15,9 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # run_opt
17
 
18
- This model is a fine-tuned version of [temporary0-0name/run_opt](https://huggingface.co/temporary0-0name/run_opt) on the wikitext dataset.
 
 
19
 
20
  ## Model description
21
 
@@ -43,18 +45,35 @@ The following hyperparameters were used during training:
43
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
  - lr_scheduler_type: cosine
45
  - lr_scheduler_warmup_steps: 100
46
- - num_epochs: 1
47
 
48
  ### Training results
49
 
50
  | Training Loss | Epoch | Step | Validation Loss |
51
  |:-------------:|:-----:|:----:|:---------------:|
52
- | No log | 1.0 | 1 | 10.4150 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
 
55
  ### Framework versions
56
 
57
- - Transformers 4.35.2
58
- - Pytorch 2.1.0+cu118
59
- - Datasets 2.15.0
60
- - Tokenizers 0.15.0
 
1
  ---
2
  license: apache-2.0
3
+ base_model: bert-base-uncased
4
  tags:
5
  - generated_from_trainer
6
  datasets:
 
15
 
16
  # run_opt
17
 
18
+ This model is a fine-tuned version of [bert-base-uncased](https://huggingface.co/bert-base-uncased) on the wikitext dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 0.0107
21
 
22
  ## Model description
23
 
 
45
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
46
  - lr_scheduler_type: cosine
47
  - lr_scheduler_warmup_steps: 100
48
+ - num_epochs: 10
49
 
50
  ### Training results
51
 
52
  | Training Loss | Epoch | Step | Validation Loss |
53
  |:-------------:|:-----:|:----:|:---------------:|
54
+ | 7.6252 | 0.55 | 100 | 6.4113 |
55
+ | 4.839 | 1.1 | 200 | 2.0385 |
56
+ | 0.9137 | 1.65 | 300 | 0.3108 |
57
+ | 0.171 | 2.2 | 400 | 0.0877 |
58
+ | 0.0542 | 2.75 | 500 | 0.0396 |
59
+ | 0.025 | 3.29 | 600 | 0.0242 |
60
+ | 0.0148 | 3.84 | 700 | 0.0180 |
61
+ | 0.0098 | 4.39 | 800 | 0.0148 |
62
+ | 0.0077 | 4.94 | 900 | 0.0130 |
63
+ | 0.006 | 5.49 | 1000 | 0.0121 |
64
+ | 0.0053 | 6.04 | 1100 | 0.0115 |
65
+ | 0.0045 | 6.59 | 1200 | 0.0112 |
66
+ | 0.0042 | 7.14 | 1300 | 0.0110 |
67
+ | 0.0039 | 7.69 | 1400 | 0.0109 |
68
+ | 0.0038 | 8.24 | 1500 | 0.0108 |
69
+ | 0.0037 | 8.79 | 1600 | 0.0107 |
70
+ | 0.0037 | 9.33 | 1700 | 0.0107 |
71
+ | 0.0036 | 9.88 | 1800 | 0.0107 |
72
 
73
 
74
  ### Framework versions
75
 
76
+ - Transformers 4.33.1
77
+ - Pytorch 1.12.1
78
+ - Datasets 2.14.6
79
+ - Tokenizers 0.13.3
config.json CHANGED
@@ -19,7 +19,7 @@
19
  "pad_token_id": 0,
20
  "position_embedding_type": "absolute",
21
  "torch_dtype": "float32",
22
- "transformers_version": "4.35.2",
23
  "type_vocab_size": 2,
24
  "use_cache": true,
25
  "vocab_size": 30522
 
19
  "pad_token_id": 0,
20
  "position_embedding_type": "absolute",
21
  "torch_dtype": "float32",
22
+ "transformers_version": "4.33.1",
23
  "type_vocab_size": 2,
24
  "use_cache": true,
25
  "vocab_size": 30522
generation_config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
  "_from_model_config": true,
3
  "pad_token_id": 0,
4
- "transformers_version": "4.35.2"
5
  }
 
1
  {
2
  "_from_model_config": true,
3
  "pad_token_id": 0,
4
+ "transformers_version": "4.33.1"
5
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:153d131dfeceac8dd97ca99344b36811f898e872944df85e63b1aa8453e176be
3
  size 438124333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdd8f113d389f61fde19184ebf65e6dd85c933e950529fa92f416eb7355be49d
3
  size 438124333
special_tokens_map.json CHANGED
@@ -1,13 +1,7 @@
1
  {
2
  "cls_token": "[CLS]",
3
  "mask_token": "[MASK]",
4
- "pad_token": {
5
- "content": "[PAD]",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false
10
- },
11
  "sep_token": "[SEP]",
12
  "unk_token": "[UNK]"
13
  }
 
1
  {
2
  "cls_token": "[CLS]",
3
  "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
 
 
 
 
 
 
5
  "sep_token": "[SEP]",
6
  "unk_token": "[UNK]"
7
  }
tokenizer_config.json CHANGED
@@ -1,46 +1,4 @@
1
  {
2
- "added_tokens_decoder": {
3
- "0": {
4
- "content": "[PAD]",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false,
9
- "special": true
10
- },
11
- "100": {
12
- "content": "[UNK]",
13
- "lstrip": false,
14
- "normalized": false,
15
- "rstrip": false,
16
- "single_word": false,
17
- "special": true
18
- },
19
- "101": {
20
- "content": "[CLS]",
21
- "lstrip": false,
22
- "normalized": false,
23
- "rstrip": false,
24
- "single_word": false,
25
- "special": true
26
- },
27
- "102": {
28
- "content": "[SEP]",
29
- "lstrip": false,
30
- "normalized": false,
31
- "rstrip": false,
32
- "single_word": false,
33
- "special": true
34
- },
35
- "103": {
36
- "content": "[MASK]",
37
- "lstrip": false,
38
- "normalized": false,
39
- "rstrip": false,
40
- "single_word": false,
41
- "special": true
42
- }
43
- },
44
  "clean_up_tokenization_spaces": true,
45
  "cls_token": "[CLS]",
46
  "do_lower_case": true,
 
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "clean_up_tokenization_spaces": true,
3
  "cls_token": "[CLS]",
4
  "do_lower_case": true,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1146ba05bdf0c271e8b37a2ab61d281c22a60ddc7dcdfed6827b245935c474a4
3
- size 4536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e0d0bb29f293a55e44e7583d04f2a3e382a5ea8209ae54677a2375f081f16a2
3
+ size 3951