jiuhai commited on
Commit
3f13d4e
1 Parent(s): 3c48031

Model save

Browse files
Files changed (5) hide show
  1. README.md +13 -13
  2. all_results.json +14 -14
  3. eval_results.json +11 -11
  4. train_results.json +3 -3
  5. trainer_state.json +0 -0
README.md CHANGED
@@ -15,15 +15,15 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [HuggingFaceH4/mistral-7b-sft-beta](https://huggingface.co/HuggingFaceH4/mistral-7b-sft-beta) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 1.0563
19
- - Rewards/chosen: -6.7505
20
- - Rewards/rejected: -10.0735
21
- - Rewards/accuracies: 0.7227
22
- - Rewards/margins: 3.3230
23
- - Logps/rejected: -273.7712
24
- - Logps/chosen: -341.3420
25
- - Logits/rejected: -2.2189
26
- - Logits/chosen: -2.3037
27
 
28
  ## Model description
29
 
@@ -42,7 +42,7 @@ More information needed
42
  ### Training hyperparameters
43
 
44
  The following hyperparameters were used during training:
45
- - learning_rate: 2e-05
46
  - train_batch_size: 16
47
  - eval_batch_size: 16
48
  - seed: 42
@@ -59,9 +59,9 @@ The following hyperparameters were used during training:
59
 
60
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
61
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
62
- | 0.0636 | 1.0 | 485 | 0.6042 | -4.0022 | -7.1190 | 0.8086 | 3.1168 | -244.2258 | -313.8589 | -2.3960 | -2.4665 |
63
- | 0.0443 | 2.0 | 970 | 0.7951 | -5.5853 | -8.9194 | 0.7383 | 3.3341 | -262.2304 | -329.6904 | -2.3026 | -2.3851 |
64
- | 0.0238 | 3.0 | 1455 | 1.0563 | -6.7505 | -10.0735 | 0.7227 | 3.3230 | -273.7712 | -341.3420 | -2.2189 | -2.3037 |
65
 
66
 
67
  ### Framework versions
 
15
 
16
  This model is a fine-tuned version of [HuggingFaceH4/mistral-7b-sft-beta](https://huggingface.co/HuggingFaceH4/mistral-7b-sft-beta) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 0.3553
19
+ - Rewards/chosen: -0.8622
20
+ - Rewards/rejected: -3.1235
21
+ - Rewards/accuracies: 0.8281
22
+ - Rewards/margins: 2.2613
23
+ - Logps/rejected: -204.2707
24
+ - Logps/chosen: -282.4587
25
+ - Logits/rejected: -2.6699
26
+ - Logits/chosen: -2.7156
27
 
28
  ## Model description
29
 
 
42
  ### Training hyperparameters
43
 
44
  The following hyperparameters were used during training:
45
+ - learning_rate: 2e-06
46
  - train_batch_size: 16
47
  - eval_batch_size: 16
48
  - seed: 42
 
59
 
60
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
61
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
62
+ | 0.2024 | 1.0 | 485 | 0.4197 | -0.3974 | -1.8930 | 0.8086 | 1.4956 | -191.9660 | -277.8107 | -2.7272 | -2.7680 |
63
+ | 0.1305 | 2.0 | 970 | 0.3694 | -0.7584 | -2.8597 | 0.8242 | 2.1013 | -201.6330 | -281.4208 | -2.6866 | -2.7306 |
64
+ | 0.109 | 3.0 | 1455 | 0.3553 | -0.8622 | -3.1235 | 0.8281 | 2.2613 | -204.2707 | -282.4587 | -2.6699 | -2.7156 |
65
 
66
 
67
  ### Framework versions
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_logits/chosen": -2.3036882877349854,
4
- "eval_logits/rejected": -2.218935012817383,
5
- "eval_logps/chosen": -341.342041015625,
6
- "eval_logps/rejected": -273.77117919921875,
7
- "eval_loss": 1.0562912225723267,
8
- "eval_rewards/accuracies": 0.72265625,
9
- "eval_rewards/chosen": -6.750503063201904,
10
- "eval_rewards/margins": 3.3230087757110596,
11
- "eval_rewards/rejected": -10.07351303100586,
12
- "eval_runtime": 258.1215,
13
  "eval_samples": 2000,
14
- "eval_samples_per_second": 7.748,
15
  "eval_steps_per_second": 0.062,
16
- "train_loss": 0.07034083745375122,
17
- "train_runtime": 46831.0549,
18
  "train_samples": 62064,
19
- "train_samples_per_second": 3.976,
20
  "train_steps_per_second": 0.031
21
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_logits/chosen": -2.715555429458618,
4
+ "eval_logits/rejected": -2.6699323654174805,
5
+ "eval_logps/chosen": -282.458740234375,
6
+ "eval_logps/rejected": -204.27066040039062,
7
+ "eval_loss": 0.3553008437156677,
8
+ "eval_rewards/accuracies": 0.828125,
9
+ "eval_rewards/chosen": -0.8621728420257568,
10
+ "eval_rewards/margins": 2.261284112930298,
11
+ "eval_rewards/rejected": -3.123457193374634,
12
+ "eval_runtime": 259.9977,
13
  "eval_samples": 2000,
14
+ "eval_samples_per_second": 7.692,
15
  "eval_steps_per_second": 0.062,
16
+ "train_loss": 0.21351368668972423,
17
+ "train_runtime": 46913.4477,
18
  "train_samples": 62064,
19
+ "train_samples_per_second": 3.969,
20
  "train_steps_per_second": 0.031
21
  }
eval_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_logits/chosen": -2.3036882877349854,
4
- "eval_logits/rejected": -2.218935012817383,
5
- "eval_logps/chosen": -341.342041015625,
6
- "eval_logps/rejected": -273.77117919921875,
7
- "eval_loss": 1.0562912225723267,
8
- "eval_rewards/accuracies": 0.72265625,
9
- "eval_rewards/chosen": -6.750503063201904,
10
- "eval_rewards/margins": 3.3230087757110596,
11
- "eval_rewards/rejected": -10.07351303100586,
12
- "eval_runtime": 258.1215,
13
  "eval_samples": 2000,
14
- "eval_samples_per_second": 7.748,
15
  "eval_steps_per_second": 0.062
16
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_logits/chosen": -2.715555429458618,
4
+ "eval_logits/rejected": -2.6699323654174805,
5
+ "eval_logps/chosen": -282.458740234375,
6
+ "eval_logps/rejected": -204.27066040039062,
7
+ "eval_loss": 0.3553008437156677,
8
+ "eval_rewards/accuracies": 0.828125,
9
+ "eval_rewards/chosen": -0.8621728420257568,
10
+ "eval_rewards/margins": 2.261284112930298,
11
+ "eval_rewards/rejected": -3.123457193374634,
12
+ "eval_runtime": 259.9977,
13
  "eval_samples": 2000,
14
+ "eval_samples_per_second": 7.692,
15
  "eval_steps_per_second": 0.062
16
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
- "train_loss": 0.07034083745375122,
4
- "train_runtime": 46831.0549,
5
  "train_samples": 62064,
6
- "train_samples_per_second": 3.976,
7
  "train_steps_per_second": 0.031
8
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "train_loss": 0.21351368668972423,
4
+ "train_runtime": 46913.4477,
5
  "train_samples": 62064,
6
+ "train_samples_per_second": 3.969,
7
  "train_steps_per_second": 0.031
8
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff