c-alfano commited on
Commit
44810c5
1 Parent(s): f941876

Model save

Browse files
README.md CHANGED
@@ -3,15 +3,10 @@ library_name: transformers
3
  license: gemma
4
  base_model: google/gemma-7b
5
  tags:
6
- - alignment-handbook
7
- - trl
8
- - orpo
9
- - generated_from_trainer
10
  - trl
11
  - orpo
 
12
  - generated_from_trainer
13
- datasets:
14
- - silviasapora/low_quality_dpo7k
15
  model-index:
16
  - name: gemma-7b-borpo-low-quality-v4
17
  results: []
@@ -22,20 +17,20 @@ should probably proofread and complete it, then remove this comment. -->
22
 
23
  # gemma-7b-borpo-low-quality-v4
24
 
25
- This model is a fine-tuned version of [google/gemma-7b](https://huggingface.co/google/gemma-7b) on the silviasapora/low_quality_dpo7k dataset.
26
  It achieves the following results on the evaluation set:
27
- - Loss: 1.9080
28
- - Rewards/chosen: -0.6019
29
- - Rewards/rejected: -0.7507
30
- - Rewards/accuracies: 0.6259
31
- - Rewards/margins: 0.1488
32
- - Logps/rejected: -1.5015
33
- - Logps/chosen: -1.2038
34
- - Logits/rejected: 247.6263
35
- - Logits/chosen: 282.0085
36
- - Nll Loss: 1.5545
37
- - Log Odds Ratio: -0.6477
38
- - Log Odds Chosen: 0.4230
39
 
40
  ## Model description
41
 
@@ -59,10 +54,10 @@ The following hyperparameters were used during training:
59
  - eval_batch_size: 1
60
  - seed: 42
61
  - distributed_type: multi-GPU
62
- - num_devices: 4
63
  - gradient_accumulation_steps: 4
64
- - total_train_batch_size: 32
65
- - total_eval_batch_size: 4
66
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
67
  - lr_scheduler_type: inverse_sqrt
68
  - lr_scheduler_warmup_steps: 100
@@ -70,11 +65,11 @@ The following hyperparameters were used during training:
70
 
71
  ### Training results
72
 
73
- | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Nll Loss | Log Odds Ratio | Log Odds Chosen |
74
- |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:--------:|:--------------:|:---------------:|
75
- | 1.8238 | 0.9955 | 167 | 1.8176 | -0.5378 | -0.6319 | 0.5468 | 0.0941 | -1.2637 | -1.0755 | 293.2744 | 322.6454 | 1.4783 | -0.6631 | 0.2616 |
76
- | 1.3092 | 1.9970 | 335 | 1.7560 | -0.5202 | -0.6309 | 0.5324 | 0.1106 | -1.2617 | -1.0405 | 279.0659 | 309.3900 | 1.4054 | -0.6637 | 0.3224 |
77
- | 0.6827 | 2.9866 | 501 | 1.9080 | -0.6019 | -0.7507 | 0.6259 | 0.1488 | -1.5015 | -1.2038 | 247.6263 | 282.0085 | 1.5545 | -0.6477 | 0.4230 |
78
 
79
 
80
  ### Framework versions
 
3
  license: gemma
4
  base_model: google/gemma-7b
5
  tags:
 
 
 
 
6
  - trl
7
  - orpo
8
+ - alignment-handbook
9
  - generated_from_trainer
 
 
10
  model-index:
11
  - name: gemma-7b-borpo-low-quality-v4
12
  results: []
 
17
 
18
  # gemma-7b-borpo-low-quality-v4
19
 
20
+ This model is a fine-tuned version of [google/gemma-7b](https://huggingface.co/google/gemma-7b) on an unknown dataset.
21
  It achieves the following results on the evaluation set:
22
+ - Loss: 1.8577
23
+ - Rewards/chosen: -0.5993
24
+ - Rewards/rejected: -0.7602
25
+ - Rewards/accuracies: 0.6143
26
+ - Rewards/margins: 0.1610
27
+ - Logps/rejected: -1.5205
28
+ - Logps/chosen: -1.1986
29
+ - Logits/rejected: 240.3907
30
+ - Logits/chosen: 301.1215
31
+ - Nll Loss: 1.5532
32
+ - Log Odds Ratio: -0.6421
33
+ - Log Odds Chosen: 0.4396
34
 
35
  ## Model description
36
 
 
54
  - eval_batch_size: 1
55
  - seed: 42
56
  - distributed_type: multi-GPU
57
+ - num_devices: 8
58
  - gradient_accumulation_steps: 4
59
+ - total_train_batch_size: 64
60
+ - total_eval_batch_size: 8
61
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
62
  - lr_scheduler_type: inverse_sqrt
63
  - lr_scheduler_warmup_steps: 100
 
65
 
66
  ### Training results
67
 
68
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Nll Loss | Log Odds Ratio | Log Odds Chosen |
69
+ |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:--------:|:--------------:|:---------------:|
70
+ | 1.8227 | 1.0 | 84 | 1.9616 | -0.6050 | -0.6743 | 0.5 | 0.0693 | -1.3486 | -1.2099 | 257.8447 | 315.1940 | 1.6719 | -0.6903 | 0.1646 |
71
+ | 1.4803 | 2.0 | 168 | 1.7681 | -0.5462 | -0.6508 | 0.5286 | 0.1046 | -1.3017 | -1.0924 | 274.3526 | 328.0207 | 1.4854 | -0.6718 | 0.2561 |
72
+ | 0.9109 | 3.0 | 252 | 1.8577 | -0.5993 | -0.7602 | 0.6143 | 0.1610 | -1.5205 | -1.1986 | 240.3907 | 301.1215 | 1.5532 | -0.6421 | 0.4396 |
73
 
74
 
75
  ### Framework versions
all_results.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "epoch": 2.9865871833084947,
3
  "eval_log_odds_chosen": 0.42297032475471497,
4
  "eval_log_odds_ratio": -0.6477048993110657,
5
  "eval_logits/chosen": 282.00848388671875,
@@ -17,9 +17,9 @@
17
  "eval_samples_per_second": 1.571,
18
  "eval_steps_per_second": 0.395,
19
  "total_flos": 0.0,
20
- "train_loss": 1.686337627812536,
21
- "train_runtime": 31593.308,
22
  "train_samples": 5364,
23
- "train_samples_per_second": 0.509,
24
- "train_steps_per_second": 0.016
25
  }
 
1
  {
2
+ "epoch": 3.0,
3
  "eval_log_odds_chosen": 0.42297032475471497,
4
  "eval_log_odds_ratio": -0.6477048993110657,
5
  "eval_logits/chosen": 282.00848388671875,
 
17
  "eval_samples_per_second": 1.571,
18
  "eval_steps_per_second": 0.395,
19
  "total_flos": 0.0,
20
+ "train_loss": 2.2429193542117165,
21
+ "train_runtime": 13126.4564,
22
  "train_samples": 5364,
23
+ "train_samples_per_second": 1.226,
24
+ "train_steps_per_second": 0.019
25
  }
config.json CHANGED
@@ -24,6 +24,6 @@
24
  "rope_theta": 10000.0,
25
  "torch_dtype": "bfloat16",
26
  "transformers_version": "4.44.2",
27
- "use_cache": true,
28
  "vocab_size": 256000
29
  }
 
24
  "rope_theta": 10000.0,
25
  "torch_dtype": "bfloat16",
26
  "transformers_version": "4.44.2",
27
+ "use_cache": false,
28
  "vocab_size": 256000
29
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7014c73861af23b57167bd5b78fd5da80aaf28da346b1537da15fcd99f35be90
3
  size 4995496656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f443dda04572c417a42cfdd54346f42ce60e90ff3d5ae05f7e731aed5844fc75
3
  size 4995496656
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:474e7b757ab8f3d73b03132238cd45157a3836ee6c115882cc07a944d0ccad63
3
  size 4982953168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a29d30c5f49faadf68cd6bc7571db8587854429f86910876e5c1909b40f940d
3
  size 4982953168
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d40c9daedb4f0dcb282736b5c1db225d948b8ec6e5c6def0c4a7c506cff1583b
3
  size 4982953200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae31c784bc60ac996a2bef0fadb0668a9347e9923fa627628f1bd6b0beb95d49
3
  size 4982953200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7669d74a0d322dfc1fb3a8e2c6f0b2bc1bbe19141386e928f00901baf950b419
3
  size 2113988336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcd7d1d840a767f2761f1873181edc7f1bcbf0fcff3ec307719affbe2e60b69e
3
  size 2113988336
runs/Sep23_17-22-00_zizgpu06.cpu.stats.ox.ac.uk/events.out.tfevents.1727108622.zizgpu06.cpu.stats.ox.ac.uk.54426.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4158871cf5f1d411a43dcb4533a2a518467c8a3f9e5b5e332c276c2e9cdc7560
3
+ size 51124
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 2.9865871833084947,
3
  "total_flos": 0.0,
4
- "train_loss": 1.686337627812536,
5
- "train_runtime": 31593.308,
6
  "train_samples": 5364,
7
- "train_samples_per_second": 0.509,
8
- "train_steps_per_second": 0.016
9
  }
 
1
  {
2
+ "epoch": 3.0,
3
  "total_flos": 0.0,
4
+ "train_loss": 2.2429193542117165,
5
+ "train_runtime": 13126.4564,
6
  "train_samples": 5364,
7
+ "train_samples_per_second": 1.226,
8
+ "train_steps_per_second": 0.019
9
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be1d8a27d48554cddc963c8d0b2118d650c144ce868f0fed3e7f108c0b781389
3
  size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d21b714b06ef407acc522612db50939c448e0c9200b396aa7f78fd733fe2884
3
  size 6776