Training in progress, step 1500, checkpoint
Browse files- last-checkpoint/global_step1500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1500/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1500/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00002.safetensors +1 -1
- last-checkpoint/model-00002-of-00002.safetensors +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +2252 -2
last-checkpoint/global_step1500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:380c5b3d4a50b2c96e9e9cbb9c39c7ba002cf4734c56dd71ec52a7009ca7d7ab
|
3 |
+
size 7843036668
|
last-checkpoint/global_step1500/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09bb2b8e1858310c41a7d6c72ec5c75bdf4dd7060cf6f257dbb00cf1a0a9b1fc
|
3 |
+
size 7843043580
|
last-checkpoint/global_step1500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2bdee7d2a297dca2ab84aca341f8f877f2d914a24734386b5ff6b922a3a6f385
|
3 |
+
size 7843043004
|
last-checkpoint/global_step1500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c91214c04de8b5db131ad5404dcba6e217f724f41d64fb874167ee3d31d9475d
|
3 |
+
size 7843043388
|
last-checkpoint/global_step1500/mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd4d869fa72945b74786f1315130651eeacb9591d23fe9c00187e45a556fc278
|
3 |
+
size 5228775200
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step1500
|
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4988030368
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:617221b5a3979bb5c195e80814b940ad0fd5e4ea46fcf53c004738f7521b9b05
|
3 |
size 4988030368
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1420344488
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:53b46ffbe9750f068af7cff31ad24813da6ce5bbc66f559f4dcbf3d434d5e8f7
|
3 |
size 1420344488
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb1f8e086c96cde9498cc8372841552a3b3d37b7449d73d2153f92624f5efc96
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 999999,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -9007,6 +9007,2256 @@
|
|
9007 |
"rewards/margins": 3.2304723262786865,
|
9008 |
"rewards/rejected": -3.3748857975006104,
|
9009 |
"step": 1200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9010 |
}
|
9011 |
],
|
9012 |
"logging_steps": 2,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.8663970759779143,
|
5 |
"eval_steps": 999999,
|
6 |
+
"global_step": 1500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
9007 |
"rewards/margins": 3.2304723262786865,
|
9008 |
"rewards/rejected": -3.3748857975006104,
|
9009 |
"step": 1200
|
9010 |
+
},
|
9011 |
+
{
|
9012 |
+
"epoch": 1.4956061902169686,
|
9013 |
+
"grad_norm": 30.88616371154785,
|
9014 |
+
"learning_rate": 3.345983350831798e-08,
|
9015 |
+
"logits/chosen": -10.752206802368164,
|
9016 |
+
"logits/rejected": -10.766036987304688,
|
9017 |
+
"logps/chosen": -19.481313705444336,
|
9018 |
+
"logps/rejected": -54.68231201171875,
|
9019 |
+
"loss": 0.3279,
|
9020 |
+
"rewards/accuracies": 0.65625,
|
9021 |
+
"rewards/chosen": 0.06634411960840225,
|
9022 |
+
"rewards/margins": 3.3881375789642334,
|
9023 |
+
"rewards/rejected": -3.321793556213379,
|
9024 |
+
"step": 1202
|
9025 |
+
},
|
9026 |
+
{
|
9027 |
+
"epoch": 1.498094719651606,
|
9028 |
+
"grad_norm": 4.875926971435547,
|
9029 |
+
"learning_rate": 3.3148972168516734e-08,
|
9030 |
+
"logits/chosen": -10.793999671936035,
|
9031 |
+
"logits/rejected": -10.790190696716309,
|
9032 |
+
"logps/chosen": -15.71826457977295,
|
9033 |
+
"logps/rejected": -53.232261657714844,
|
9034 |
+
"loss": 0.3231,
|
9035 |
+
"rewards/accuracies": 0.59375,
|
9036 |
+
"rewards/chosen": 0.3771316409111023,
|
9037 |
+
"rewards/margins": 3.606487512588501,
|
9038 |
+
"rewards/rejected": -3.229356050491333,
|
9039 |
+
"step": 1204
|
9040 |
+
},
|
9041 |
+
{
|
9042 |
+
"epoch": 1.500583249086243,
|
9043 |
+
"grad_norm": 1.0279144048690796,
|
9044 |
+
"learning_rate": 3.2839274464991854e-08,
|
9045 |
+
"logits/chosen": -10.80120849609375,
|
9046 |
+
"logits/rejected": -10.804399490356445,
|
9047 |
+
"logps/chosen": -19.204309463500977,
|
9048 |
+
"logps/rejected": -54.1872673034668,
|
9049 |
+
"loss": 0.387,
|
9050 |
+
"rewards/accuracies": 0.65625,
|
9051 |
+
"rewards/chosen": 0.022899247705936432,
|
9052 |
+
"rewards/margins": 3.3754444122314453,
|
9053 |
+
"rewards/rejected": -3.3525447845458984,
|
9054 |
+
"step": 1206
|
9055 |
+
},
|
9056 |
+
{
|
9057 |
+
"epoch": 1.5030717785208805,
|
9058 |
+
"grad_norm": 9.367308616638184,
|
9059 |
+
"learning_rate": 3.253074578846805e-08,
|
9060 |
+
"logits/chosen": -10.806297302246094,
|
9061 |
+
"logits/rejected": -10.80675983428955,
|
9062 |
+
"logps/chosen": -20.797101974487305,
|
9063 |
+
"logps/rejected": -50.487945556640625,
|
9064 |
+
"loss": 0.3453,
|
9065 |
+
"rewards/accuracies": 0.6875,
|
9066 |
+
"rewards/chosen": -0.15202410519123077,
|
9067 |
+
"rewards/margins": 2.7633230686187744,
|
9068 |
+
"rewards/rejected": -2.915347099304199,
|
9069 |
+
"step": 1208
|
9070 |
+
},
|
9071 |
+
{
|
9072 |
+
"epoch": 1.5055603079555175,
|
9073 |
+
"grad_norm": 42.94970703125,
|
9074 |
+
"learning_rate": 3.222339150932133e-08,
|
9075 |
+
"logits/chosen": -10.7854585647583,
|
9076 |
+
"logits/rejected": -10.784134864807129,
|
9077 |
+
"logps/chosen": -19.761821746826172,
|
9078 |
+
"logps/rejected": -61.553802490234375,
|
9079 |
+
"loss": 0.3576,
|
9080 |
+
"rewards/accuracies": 0.71875,
|
9081 |
+
"rewards/chosen": 0.026075001806020737,
|
9082 |
+
"rewards/margins": 4.108028888702393,
|
9083 |
+
"rewards/rejected": -4.081954002380371,
|
9084 |
+
"step": 1210
|
9085 |
+
},
|
9086 |
+
{
|
9087 |
+
"epoch": 1.5080488373901546,
|
9088 |
+
"grad_norm": 33.7689323425293,
|
9089 |
+
"learning_rate": 3.191721697748576e-08,
|
9090 |
+
"logits/chosen": -10.799263954162598,
|
9091 |
+
"logits/rejected": -10.799744606018066,
|
9092 |
+
"logps/chosen": -23.306804656982422,
|
9093 |
+
"logps/rejected": -64.52201843261719,
|
9094 |
+
"loss": 0.2794,
|
9095 |
+
"rewards/accuracies": 0.75,
|
9096 |
+
"rewards/chosen": -0.3046596050262451,
|
9097 |
+
"rewards/margins": 4.032251834869385,
|
9098 |
+
"rewards/rejected": -4.336911201477051,
|
9099 |
+
"step": 1212
|
9100 |
+
},
|
9101 |
+
{
|
9102 |
+
"epoch": 1.5105373668247921,
|
9103 |
+
"grad_norm": 6.178465366363525,
|
9104 |
+
"learning_rate": 3.161222752236024e-08,
|
9105 |
+
"logits/chosen": -10.777413368225098,
|
9106 |
+
"logits/rejected": -10.791770935058594,
|
9107 |
+
"logps/chosen": -24.492530822753906,
|
9108 |
+
"logps/rejected": -61.961761474609375,
|
9109 |
+
"loss": 0.3106,
|
9110 |
+
"rewards/accuracies": 0.71875,
|
9111 |
+
"rewards/chosen": -0.4530527591705322,
|
9112 |
+
"rewards/margins": 3.6912074089050293,
|
9113 |
+
"rewards/rejected": -4.144260406494141,
|
9114 |
+
"step": 1214
|
9115 |
+
},
|
9116 |
+
{
|
9117 |
+
"epoch": 1.5130258962594292,
|
9118 |
+
"grad_norm": 11.556970596313477,
|
9119 |
+
"learning_rate": 3.130842845271564e-08,
|
9120 |
+
"logits/chosen": -10.765844345092773,
|
9121 |
+
"logits/rejected": -10.766853332519531,
|
9122 |
+
"logps/chosen": -18.27448272705078,
|
9123 |
+
"logps/rejected": -67.04450225830078,
|
9124 |
+
"loss": 0.3224,
|
9125 |
+
"rewards/accuracies": 0.8125,
|
9126 |
+
"rewards/chosen": 0.13868612051010132,
|
9127 |
+
"rewards/margins": 4.729941368103027,
|
9128 |
+
"rewards/rejected": -4.591255187988281,
|
9129 |
+
"step": 1216
|
9130 |
+
},
|
9131 |
+
{
|
9132 |
+
"epoch": 1.5155144256940662,
|
9133 |
+
"grad_norm": 5.874917984008789,
|
9134 |
+
"learning_rate": 3.100582505660263e-08,
|
9135 |
+
"logits/chosen": -10.817387580871582,
|
9136 |
+
"logits/rejected": -10.818646430969238,
|
9137 |
+
"logps/chosen": -22.664018630981445,
|
9138 |
+
"logps/rejected": -61.78883361816406,
|
9139 |
+
"loss": 0.3315,
|
9140 |
+
"rewards/accuracies": 0.71875,
|
9141 |
+
"rewards/chosen": -0.26951882243156433,
|
9142 |
+
"rewards/margins": 3.7975308895111084,
|
9143 |
+
"rewards/rejected": -4.067049980163574,
|
9144 |
+
"step": 1218
|
9145 |
+
},
|
9146 |
+
{
|
9147 |
+
"epoch": 1.5180029551287038,
|
9148 |
+
"grad_norm": 7.716071128845215,
|
9149 |
+
"learning_rate": 3.0704422601259386e-08,
|
9150 |
+
"logits/chosen": -10.804115295410156,
|
9151 |
+
"logits/rejected": -10.803618431091309,
|
9152 |
+
"logps/chosen": -20.043956756591797,
|
9153 |
+
"logps/rejected": -59.39397430419922,
|
9154 |
+
"loss": 0.3588,
|
9155 |
+
"rewards/accuracies": 0.71875,
|
9156 |
+
"rewards/chosen": -0.025287901982665062,
|
9157 |
+
"rewards/margins": 3.833770990371704,
|
9158 |
+
"rewards/rejected": -3.8590588569641113,
|
9159 |
+
"step": 1220
|
9160 |
+
},
|
9161 |
+
{
|
9162 |
+
"epoch": 1.5204914845633408,
|
9163 |
+
"grad_norm": 5.23994779586792,
|
9164 |
+
"learning_rate": 3.0404226333020114e-08,
|
9165 |
+
"logits/chosen": -10.771346092224121,
|
9166 |
+
"logits/rejected": -10.723176002502441,
|
9167 |
+
"logps/chosen": -21.367895126342773,
|
9168 |
+
"logps/rejected": -51.024864196777344,
|
9169 |
+
"loss": 0.2626,
|
9170 |
+
"rewards/accuracies": 0.59375,
|
9171 |
+
"rewards/chosen": -0.20692677795886993,
|
9172 |
+
"rewards/margins": 2.8495285511016846,
|
9173 |
+
"rewards/rejected": -3.056455135345459,
|
9174 |
+
"step": 1222
|
9175 |
+
},
|
9176 |
+
{
|
9177 |
+
"epoch": 1.522980013997978,
|
9178 |
+
"grad_norm": 17.1359920501709,
|
9179 |
+
"learning_rate": 3.010524147722353e-08,
|
9180 |
+
"logits/chosen": -10.765385627746582,
|
9181 |
+
"logits/rejected": -10.76011848449707,
|
9182 |
+
"logps/chosen": -25.971342086791992,
|
9183 |
+
"logps/rejected": -48.49481201171875,
|
9184 |
+
"loss": 0.3097,
|
9185 |
+
"rewards/accuracies": 0.625,
|
9186 |
+
"rewards/chosen": -0.6388694047927856,
|
9187 |
+
"rewards/margins": 2.1466176509857178,
|
9188 |
+
"rewards/rejected": -2.785486936569214,
|
9189 |
+
"step": 1224
|
9190 |
+
},
|
9191 |
+
{
|
9192 |
+
"epoch": 1.5254685434326154,
|
9193 |
+
"grad_norm": 5.530016899108887,
|
9194 |
+
"learning_rate": 2.9807473238122096e-08,
|
9195 |
+
"logits/chosen": -10.834047317504883,
|
9196 |
+
"logits/rejected": -10.830108642578125,
|
9197 |
+
"logps/chosen": -23.04602813720703,
|
9198 |
+
"logps/rejected": -76.07076263427734,
|
9199 |
+
"loss": 0.2867,
|
9200 |
+
"rewards/accuracies": 0.84375,
|
9201 |
+
"rewards/chosen": -0.3454991281032562,
|
9202 |
+
"rewards/margins": 5.049150466918945,
|
9203 |
+
"rewards/rejected": -5.394649028778076,
|
9204 |
+
"step": 1226
|
9205 |
+
},
|
9206 |
+
{
|
9207 |
+
"epoch": 1.5279570728672525,
|
9208 |
+
"grad_norm": 7.995519161224365,
|
9209 |
+
"learning_rate": 2.951092679879136e-08,
|
9210 |
+
"logits/chosen": -10.817156791687012,
|
9211 |
+
"logits/rejected": -10.819764137268066,
|
9212 |
+
"logps/chosen": -26.958938598632812,
|
9213 |
+
"logps/rejected": -63.7525634765625,
|
9214 |
+
"loss": 0.3123,
|
9215 |
+
"rewards/accuracies": 0.78125,
|
9216 |
+
"rewards/chosen": -0.7338520288467407,
|
9217 |
+
"rewards/margins": 3.5241026878356934,
|
9218 |
+
"rewards/rejected": -4.2579545974731445,
|
9219 |
+
"step": 1228
|
9220 |
+
},
|
9221 |
+
{
|
9222 |
+
"epoch": 1.5304456023018898,
|
9223 |
+
"grad_norm": 2.934828519821167,
|
9224 |
+
"learning_rate": 2.9215607321039604e-08,
|
9225 |
+
"logits/chosen": -10.804816246032715,
|
9226 |
+
"logits/rejected": -10.802118301391602,
|
9227 |
+
"logps/chosen": -30.615299224853516,
|
9228 |
+
"logps/rejected": -78.92305755615234,
|
9229 |
+
"loss": 0.2861,
|
9230 |
+
"rewards/accuracies": 0.78125,
|
9231 |
+
"rewards/chosen": -1.078827142715454,
|
9232 |
+
"rewards/margins": 4.707486629486084,
|
9233 |
+
"rewards/rejected": -5.786314010620117,
|
9234 |
+
"step": 1230
|
9235 |
+
},
|
9236 |
+
{
|
9237 |
+
"epoch": 1.532934131736527,
|
9238 |
+
"grad_norm": 10.17944622039795,
|
9239 |
+
"learning_rate": 2.8921519945318274e-08,
|
9240 |
+
"logits/chosen": -10.815936088562012,
|
9241 |
+
"logits/rejected": -10.81978702545166,
|
9242 |
+
"logps/chosen": -22.286001205444336,
|
9243 |
+
"logps/rejected": -50.4647331237793,
|
9244 |
+
"loss": 0.3433,
|
9245 |
+
"rewards/accuracies": 0.65625,
|
9246 |
+
"rewards/chosen": -0.28749099373817444,
|
9247 |
+
"rewards/margins": 2.67037296295166,
|
9248 |
+
"rewards/rejected": -2.9578638076782227,
|
9249 |
+
"step": 1232
|
9250 |
+
},
|
9251 |
+
{
|
9252 |
+
"epoch": 1.535422661171164,
|
9253 |
+
"grad_norm": 22.162673950195312,
|
9254 |
+
"learning_rate": 2.8628669790632188e-08,
|
9255 |
+
"logits/chosen": -10.832916259765625,
|
9256 |
+
"logits/rejected": -10.841936111450195,
|
9257 |
+
"logps/chosen": -24.979412078857422,
|
9258 |
+
"logps/rejected": -69.00961303710938,
|
9259 |
+
"loss": 0.2422,
|
9260 |
+
"rewards/accuracies": 0.71875,
|
9261 |
+
"rewards/chosen": -0.5436473488807678,
|
9262 |
+
"rewards/margins": 4.181175231933594,
|
9263 |
+
"rewards/rejected": -4.724822521209717,
|
9264 |
+
"step": 1234
|
9265 |
+
},
|
9266 |
+
{
|
9267 |
+
"epoch": 1.5379111906058014,
|
9268 |
+
"grad_norm": 10.095711708068848,
|
9269 |
+
"learning_rate": 2.8337061954450748e-08,
|
9270 |
+
"logits/chosen": -10.793304443359375,
|
9271 |
+
"logits/rejected": -10.794236183166504,
|
9272 |
+
"logps/chosen": -24.665653228759766,
|
9273 |
+
"logps/rejected": -65.7728271484375,
|
9274 |
+
"loss": 0.3002,
|
9275 |
+
"rewards/accuracies": 0.625,
|
9276 |
+
"rewards/chosen": -0.5008203387260437,
|
9277 |
+
"rewards/margins": 3.981152057647705,
|
9278 |
+
"rewards/rejected": -4.481971740722656,
|
9279 |
+
"step": 1236
|
9280 |
+
},
|
9281 |
+
{
|
9282 |
+
"epoch": 1.5403997200404387,
|
9283 |
+
"grad_norm": 50.88451385498047,
|
9284 |
+
"learning_rate": 2.804670151261891e-08,
|
9285 |
+
"logits/chosen": -10.825129508972168,
|
9286 |
+
"logits/rejected": -10.825586318969727,
|
9287 |
+
"logps/chosen": -28.969776153564453,
|
9288 |
+
"logps/rejected": -54.09687042236328,
|
9289 |
+
"loss": 0.3774,
|
9290 |
+
"rewards/accuracies": 0.65625,
|
9291 |
+
"rewards/chosen": -0.9328697323799133,
|
9292 |
+
"rewards/margins": 2.505774974822998,
|
9293 |
+
"rewards/rejected": -3.4386448860168457,
|
9294 |
+
"step": 1238
|
9295 |
+
},
|
9296 |
+
{
|
9297 |
+
"epoch": 1.5428882494750757,
|
9298 |
+
"grad_norm": 10.008930206298828,
|
9299 |
+
"learning_rate": 2.7757593519269084e-08,
|
9300 |
+
"logits/chosen": -10.8192138671875,
|
9301 |
+
"logits/rejected": -10.820106506347656,
|
9302 |
+
"logps/chosen": -24.02053451538086,
|
9303 |
+
"logps/rejected": -57.27711868286133,
|
9304 |
+
"loss": 0.2994,
|
9305 |
+
"rewards/accuracies": 0.71875,
|
9306 |
+
"rewards/chosen": -0.46384507417678833,
|
9307 |
+
"rewards/margins": 3.1840813159942627,
|
9308 |
+
"rewards/rejected": -3.6479263305664062,
|
9309 |
+
"step": 1240
|
9310 |
+
},
|
9311 |
+
{
|
9312 |
+
"epoch": 1.545376778909713,
|
9313 |
+
"grad_norm": 8.246393203735352,
|
9314 |
+
"learning_rate": 2.746974300673296e-08,
|
9315 |
+
"logits/chosen": -10.833887100219727,
|
9316 |
+
"logits/rejected": -10.839536666870117,
|
9317 |
+
"logps/chosen": -34.37361145019531,
|
9318 |
+
"logps/rejected": -72.40042114257812,
|
9319 |
+
"loss": 0.3491,
|
9320 |
+
"rewards/accuracies": 0.59375,
|
9321 |
+
"rewards/chosen": -1.4933593273162842,
|
9322 |
+
"rewards/margins": 3.673614501953125,
|
9323 |
+
"rewards/rejected": -5.16697359085083,
|
9324 |
+
"step": 1242
|
9325 |
+
},
|
9326 |
+
{
|
9327 |
+
"epoch": 1.5478653083443503,
|
9328 |
+
"grad_norm": 9.959875106811523,
|
9329 |
+
"learning_rate": 2.718315498545407e-08,
|
9330 |
+
"logits/chosen": -10.775245666503906,
|
9331 |
+
"logits/rejected": -10.781604766845703,
|
9332 |
+
"logps/chosen": -27.01799201965332,
|
9333 |
+
"logps/rejected": -58.5137825012207,
|
9334 |
+
"loss": 0.3782,
|
9335 |
+
"rewards/accuracies": 0.71875,
|
9336 |
+
"rewards/chosen": -0.739520788192749,
|
9337 |
+
"rewards/margins": 3.0444231033325195,
|
9338 |
+
"rewards/rejected": -3.7839434146881104,
|
9339 |
+
"step": 1244
|
9340 |
+
},
|
9341 |
+
{
|
9342 |
+
"epoch": 1.5503538377789874,
|
9343 |
+
"grad_norm": 6.7338480949401855,
|
9344 |
+
"learning_rate": 2.6897834443900524e-08,
|
9345 |
+
"logits/chosen": -10.767149925231934,
|
9346 |
+
"logits/rejected": -10.770130157470703,
|
9347 |
+
"logps/chosen": -27.426719665527344,
|
9348 |
+
"logps/rejected": -58.368736267089844,
|
9349 |
+
"loss": 0.3511,
|
9350 |
+
"rewards/accuracies": 0.625,
|
9351 |
+
"rewards/chosen": -0.7709727883338928,
|
9352 |
+
"rewards/margins": 3.010869264602661,
|
9353 |
+
"rewards/rejected": -3.7818422317504883,
|
9354 |
+
"step": 1246
|
9355 |
+
},
|
9356 |
+
{
|
9357 |
+
"epoch": 1.5528423672136247,
|
9358 |
+
"grad_norm": 4.529770374298096,
|
9359 |
+
"learning_rate": 2.661378634847805e-08,
|
9360 |
+
"logits/chosen": -10.790437698364258,
|
9361 |
+
"logits/rejected": -10.792939186096191,
|
9362 |
+
"logps/chosen": -28.72300910949707,
|
9363 |
+
"logps/rejected": -63.083126068115234,
|
9364 |
+
"loss": 0.2706,
|
9365 |
+
"rewards/accuracies": 0.625,
|
9366 |
+
"rewards/chosen": -0.8835013508796692,
|
9367 |
+
"rewards/margins": 3.3935039043426514,
|
9368 |
+
"rewards/rejected": -4.277005195617676,
|
9369 |
+
"step": 1248
|
9370 |
+
},
|
9371 |
+
{
|
9372 |
+
"epoch": 1.555330896648262,
|
9373 |
+
"grad_norm": 4.372297763824463,
|
9374 |
+
"learning_rate": 2.633101564344381e-08,
|
9375 |
+
"logits/chosen": -10.801713943481445,
|
9376 |
+
"logits/rejected": -10.806915283203125,
|
9377 |
+
"logps/chosen": -35.876853942871094,
|
9378 |
+
"logps/rejected": -73.22596740722656,
|
9379 |
+
"loss": 0.3021,
|
9380 |
+
"rewards/accuracies": 0.75,
|
9381 |
+
"rewards/chosen": -1.6220556497573853,
|
9382 |
+
"rewards/margins": 3.583003044128418,
|
9383 |
+
"rewards/rejected": -5.205059051513672,
|
9384 |
+
"step": 1250
|
9385 |
+
},
|
9386 |
+
{
|
9387 |
+
"epoch": 1.557819426082899,
|
9388 |
+
"grad_norm": 78.30384063720703,
|
9389 |
+
"learning_rate": 2.6049527250820048e-08,
|
9390 |
+
"logits/chosen": -10.82096004486084,
|
9391 |
+
"logits/rejected": -10.825112342834473,
|
9392 |
+
"logps/chosen": -24.951976776123047,
|
9393 |
+
"logps/rejected": -56.246063232421875,
|
9394 |
+
"loss": 0.2652,
|
9395 |
+
"rewards/accuracies": 0.625,
|
9396 |
+
"rewards/chosen": -0.5131027698516846,
|
9397 |
+
"rewards/margins": 3.009666919708252,
|
9398 |
+
"rewards/rejected": -3.5227696895599365,
|
9399 |
+
"step": 1252
|
9400 |
+
},
|
9401 |
+
{
|
9402 |
+
"epoch": 1.5603079555175363,
|
9403 |
+
"grad_norm": 26.9722900390625,
|
9404 |
+
"learning_rate": 2.5769326070308673e-08,
|
9405 |
+
"logits/chosen": -10.822922706604004,
|
9406 |
+
"logits/rejected": -10.821035385131836,
|
9407 |
+
"logps/chosen": -29.673250198364258,
|
9408 |
+
"logps/rejected": -67.98855590820312,
|
9409 |
+
"loss": 0.3846,
|
9410 |
+
"rewards/accuracies": 0.71875,
|
9411 |
+
"rewards/chosen": -1.0159591436386108,
|
9412 |
+
"rewards/margins": 3.564460039138794,
|
9413 |
+
"rewards/rejected": -4.580419063568115,
|
9414 |
+
"step": 1254
|
9415 |
+
},
|
9416 |
+
{
|
9417 |
+
"epoch": 1.5627964849521736,
|
9418 |
+
"grad_norm": 7.283795356750488,
|
9419 |
+
"learning_rate": 2.5490416979205754e-08,
|
9420 |
+
"logits/chosen": -10.802266120910645,
|
9421 |
+
"logits/rejected": -10.805842399597168,
|
9422 |
+
"logps/chosen": -24.364179611206055,
|
9423 |
+
"logps/rejected": -71.14039611816406,
|
9424 |
+
"loss": 0.3523,
|
9425 |
+
"rewards/accuracies": 0.71875,
|
9426 |
+
"rewards/chosen": -0.4461979866027832,
|
9427 |
+
"rewards/margins": 4.550894737243652,
|
9428 |
+
"rewards/rejected": -4.9970927238464355,
|
9429 |
+
"step": 1256
|
9430 |
+
},
|
9431 |
+
{
|
9432 |
+
"epoch": 1.5652850143868107,
|
9433 |
+
"grad_norm": 34.32064437866211,
|
9434 |
+
"learning_rate": 2.521280483231678e-08,
|
9435 |
+
"logits/chosen": -10.794498443603516,
|
9436 |
+
"logits/rejected": -10.793449401855469,
|
9437 |
+
"logps/chosen": -28.45277214050293,
|
9438 |
+
"logps/rejected": -65.07192993164062,
|
9439 |
+
"loss": 0.2844,
|
9440 |
+
"rewards/accuracies": 0.71875,
|
9441 |
+
"rewards/chosen": -0.8511688113212585,
|
9442 |
+
"rewards/margins": 3.4714159965515137,
|
9443 |
+
"rewards/rejected": -4.322584629058838,
|
9444 |
+
"step": 1258
|
9445 |
+
},
|
9446 |
+
{
|
9447 |
+
"epoch": 1.5677735438214482,
|
9448 |
+
"grad_norm": 4.81486701965332,
|
9449 |
+
"learning_rate": 2.4936494461872125e-08,
|
9450 |
+
"logits/chosen": -10.802877426147461,
|
9451 |
+
"logits/rejected": -10.80418586730957,
|
9452 |
+
"logps/chosen": -26.037837982177734,
|
9453 |
+
"logps/rejected": -66.15322875976562,
|
9454 |
+
"loss": 0.2786,
|
9455 |
+
"rewards/accuracies": 0.71875,
|
9456 |
+
"rewards/chosen": -0.6298637390136719,
|
9457 |
+
"rewards/margins": 3.9220423698425293,
|
9458 |
+
"rewards/rejected": -4.551905632019043,
|
9459 |
+
"step": 1260
|
9460 |
+
},
|
9461 |
+
{
|
9462 |
+
"epoch": 1.5702620732560852,
|
9463 |
+
"grad_norm": 10.898475646972656,
|
9464 |
+
"learning_rate": 2.4661490677442832e-08,
|
9465 |
+
"logits/chosen": -10.785269737243652,
|
9466 |
+
"logits/rejected": -10.790578842163086,
|
9467 |
+
"logps/chosen": -26.191368103027344,
|
9468 |
+
"logps/rejected": -51.842777252197266,
|
9469 |
+
"loss": 0.3908,
|
9470 |
+
"rewards/accuracies": 0.78125,
|
9471 |
+
"rewards/chosen": -0.6927503943443298,
|
9472 |
+
"rewards/margins": 2.401498794555664,
|
9473 |
+
"rewards/rejected": -3.0942492485046387,
|
9474 |
+
"step": 1262
|
9475 |
+
},
|
9476 |
+
{
|
9477 |
+
"epoch": 1.5727506026907223,
|
9478 |
+
"grad_norm": 12.538436889648438,
|
9479 |
+
"learning_rate": 2.4387798265857075e-08,
|
9480 |
+
"logits/chosen": -10.842425346374512,
|
9481 |
+
"logits/rejected": -10.846158981323242,
|
9482 |
+
"logps/chosen": -28.756683349609375,
|
9483 |
+
"logps/rejected": -64.9261245727539,
|
9484 |
+
"loss": 0.3156,
|
9485 |
+
"rewards/accuracies": 0.625,
|
9486 |
+
"rewards/chosen": -0.8623592257499695,
|
9487 |
+
"rewards/margins": 3.4886474609375,
|
9488 |
+
"rewards/rejected": -4.351006984710693,
|
9489 |
+
"step": 1264
|
9490 |
+
},
|
9491 |
+
{
|
9492 |
+
"epoch": 1.5752391321253598,
|
9493 |
+
"grad_norm": 12.948237419128418,
|
9494 |
+
"learning_rate": 2.4115421991116603e-08,
|
9495 |
+
"logits/chosen": -10.809388160705566,
|
9496 |
+
"logits/rejected": -10.816155433654785,
|
9497 |
+
"logps/chosen": -26.006319046020508,
|
9498 |
+
"logps/rejected": -77.81361389160156,
|
9499 |
+
"loss": 0.2467,
|
9500 |
+
"rewards/accuracies": 0.84375,
|
9501 |
+
"rewards/chosen": -0.5785080194473267,
|
9502 |
+
"rewards/margins": 5.077298641204834,
|
9503 |
+
"rewards/rejected": -5.655807018280029,
|
9504 |
+
"step": 1266
|
9505 |
+
},
|
9506 |
+
{
|
9507 |
+
"epoch": 1.5777276615599969,
|
9508 |
+
"grad_norm": 43.29991149902344,
|
9509 |
+
"learning_rate": 2.3844366594314092e-08,
|
9510 |
+
"logits/chosen": -10.77228832244873,
|
9511 |
+
"logits/rejected": -10.772639274597168,
|
9512 |
+
"logps/chosen": -20.349361419677734,
|
9513 |
+
"logps/rejected": -52.78181076049805,
|
9514 |
+
"loss": 0.3604,
|
9515 |
+
"rewards/accuracies": 0.65625,
|
9516 |
+
"rewards/chosen": -0.04957125708460808,
|
9517 |
+
"rewards/margins": 3.16991925239563,
|
9518 |
+
"rewards/rejected": -3.2194907665252686,
|
9519 |
+
"step": 1268
|
9520 |
+
},
|
9521 |
+
{
|
9522 |
+
"epoch": 1.580216190994634,
|
9523 |
+
"grad_norm": 5.082618236541748,
|
9524 |
+
"learning_rate": 2.3574636793550375e-08,
|
9525 |
+
"logits/chosen": -10.785139083862305,
|
9526 |
+
"logits/rejected": -10.78280258178711,
|
9527 |
+
"logps/chosen": -25.451231002807617,
|
9528 |
+
"logps/rejected": -53.280418395996094,
|
9529 |
+
"loss": 0.352,
|
9530 |
+
"rewards/accuracies": 0.5625,
|
9531 |
+
"rewards/chosen": -0.5985268354415894,
|
9532 |
+
"rewards/margins": 2.6375532150268555,
|
9533 |
+
"rewards/rejected": -3.2360801696777344,
|
9534 |
+
"step": 1270
|
9535 |
+
},
|
9536 |
+
{
|
9537 |
+
"epoch": 1.5827047204292715,
|
9538 |
+
"grad_norm": 141.70098876953125,
|
9539 |
+
"learning_rate": 2.330623728385246e-08,
|
9540 |
+
"logits/chosen": -10.739248275756836,
|
9541 |
+
"logits/rejected": -10.738280296325684,
|
9542 |
+
"logps/chosen": -24.47553253173828,
|
9543 |
+
"logps/rejected": -61.69512939453125,
|
9544 |
+
"loss": 0.4036,
|
9545 |
+
"rewards/accuracies": 0.71875,
|
9546 |
+
"rewards/chosen": -0.4863220751285553,
|
9547 |
+
"rewards/margins": 3.448641300201416,
|
9548 |
+
"rewards/rejected": -3.9349637031555176,
|
9549 |
+
"step": 1272
|
9550 |
+
},
|
9551 |
+
{
|
9552 |
+
"epoch": 1.5851932498639085,
|
9553 |
+
"grad_norm": 6.310169219970703,
|
9554 |
+
"learning_rate": 2.3039172737091807e-08,
|
9555 |
+
"logits/chosen": -10.816719055175781,
|
9556 |
+
"logits/rejected": -10.82149887084961,
|
9557 |
+
"logps/chosen": -23.517425537109375,
|
9558 |
+
"logps/rejected": -44.70637512207031,
|
9559 |
+
"loss": 0.3769,
|
9560 |
+
"rewards/accuracies": 0.59375,
|
9561 |
+
"rewards/chosen": -0.39209693670272827,
|
9562 |
+
"rewards/margins": 2.0814270973205566,
|
9563 |
+
"rewards/rejected": -2.4735240936279297,
|
9564 |
+
"step": 1274
|
9565 |
+
},
|
9566 |
+
{
|
9567 |
+
"epoch": 1.5876817792985458,
|
9568 |
+
"grad_norm": 13.863208770751953,
|
9569 |
+
"learning_rate": 2.2773447801902855e-08,
|
9570 |
+
"logits/chosen": -10.832393646240234,
|
9571 |
+
"logits/rejected": -10.82877254486084,
|
9572 |
+
"logps/chosen": -18.8774471282959,
|
9573 |
+
"logps/rejected": -49.65419006347656,
|
9574 |
+
"loss": 0.354,
|
9575 |
+
"rewards/accuracies": 0.6875,
|
9576 |
+
"rewards/chosen": 0.06631821393966675,
|
9577 |
+
"rewards/margins": 2.8835067749023438,
|
9578 |
+
"rewards/rejected": -2.8171889781951904,
|
9579 |
+
"step": 1276
|
9580 |
+
},
|
9581 |
+
{
|
9582 |
+
"epoch": 1.590170308733183,
|
9583 |
+
"grad_norm": 15.649863243103027,
|
9584 |
+
"learning_rate": 2.250906710360235e-08,
|
9585 |
+
"logits/chosen": -10.807013511657715,
|
9586 |
+
"logits/rejected": -10.8053560256958,
|
9587 |
+
"logps/chosen": -22.7423038482666,
|
9588 |
+
"logps/rejected": -49.1067008972168,
|
9589 |
+
"loss": 0.3119,
|
9590 |
+
"rewards/accuracies": 0.59375,
|
9591 |
+
"rewards/chosen": -0.3249521255493164,
|
9592 |
+
"rewards/margins": 2.519008159637451,
|
9593 |
+
"rewards/rejected": -2.8439598083496094,
|
9594 |
+
"step": 1278
|
9595 |
+
},
|
9596 |
+
{
|
9597 |
+
"epoch": 1.5926588381678202,
|
9598 |
+
"grad_norm": 15.802570343017578,
|
9599 |
+
"learning_rate": 2.2246035244108586e-08,
|
9600 |
+
"logits/chosen": -10.775257110595703,
|
9601 |
+
"logits/rejected": -10.774992942810059,
|
9602 |
+
"logps/chosen": -20.724456787109375,
|
9603 |
+
"logps/rejected": -60.901023864746094,
|
9604 |
+
"loss": 0.3112,
|
9605 |
+
"rewards/accuracies": 0.75,
|
9606 |
+
"rewards/chosen": -0.12378637492656708,
|
9607 |
+
"rewards/margins": 3.8235392570495605,
|
9608 |
+
"rewards/rejected": -3.9473254680633545,
|
9609 |
+
"step": 1280
|
9610 |
+
},
|
9611 |
+
{
|
9612 |
+
"epoch": 1.5951473676024575,
|
9613 |
+
"grad_norm": 18.153547286987305,
|
9614 |
+
"learning_rate": 2.1984356801861502e-08,
|
9615 |
+
"logits/chosen": -10.780445098876953,
|
9616 |
+
"logits/rejected": -10.779111862182617,
|
9617 |
+
"logps/chosen": -22.182926177978516,
|
9618 |
+
"logps/rejected": -62.560401916503906,
|
9619 |
+
"loss": 0.2616,
|
9620 |
+
"rewards/accuracies": 0.65625,
|
9621 |
+
"rewards/chosen": -0.21629738807678223,
|
9622 |
+
"rewards/margins": 3.8685829639434814,
|
9623 |
+
"rewards/rejected": -4.084880352020264,
|
9624 |
+
"step": 1282
|
9625 |
+
},
|
9626 |
+
{
|
9627 |
+
"epoch": 1.5976358970370947,
|
9628 |
+
"grad_norm": 5.7486443519592285,
|
9629 |
+
"learning_rate": 2.1724036331742834e-08,
|
9630 |
+
"logits/chosen": -10.770216941833496,
|
9631 |
+
"logits/rejected": -10.772773742675781,
|
9632 |
+
"logps/chosen": -21.333688735961914,
|
9633 |
+
"logps/rejected": -49.32101058959961,
|
9634 |
+
"loss": 0.3264,
|
9635 |
+
"rewards/accuracies": 0.6875,
|
9636 |
+
"rewards/chosen": -0.12258227914571762,
|
9637 |
+
"rewards/margins": 2.6417171955108643,
|
9638 |
+
"rewards/rejected": -2.7642996311187744,
|
9639 |
+
"step": 1284
|
9640 |
+
},
|
9641 |
+
{
|
9642 |
+
"epoch": 1.6001244264717318,
|
9643 |
+
"grad_norm": 72.83959197998047,
|
9644 |
+
"learning_rate": 2.1465078364996968e-08,
|
9645 |
+
"logits/chosen": -10.802704811096191,
|
9646 |
+
"logits/rejected": -10.801321029663086,
|
9647 |
+
"logps/chosen": -23.669212341308594,
|
9648 |
+
"logps/rejected": -56.59158706665039,
|
9649 |
+
"loss": 0.3146,
|
9650 |
+
"rewards/accuracies": 0.71875,
|
9651 |
+
"rewards/chosen": -0.41491052508354187,
|
9652 |
+
"rewards/margins": 3.080634117126465,
|
9653 |
+
"rewards/rejected": -3.495544672012329,
|
9654 |
+
"step": 1286
|
9655 |
+
},
|
9656 |
+
{
|
9657 |
+
"epoch": 1.602612955906369,
|
9658 |
+
"grad_norm": 34.2864875793457,
|
9659 |
+
"learning_rate": 2.120748740915198e-08,
|
9660 |
+
"logits/chosen": -10.832742691040039,
|
9661 |
+
"logits/rejected": -10.830432891845703,
|
9662 |
+
"logps/chosen": -21.441856384277344,
|
9663 |
+
"logps/rejected": -63.40147399902344,
|
9664 |
+
"loss": 0.2765,
|
9665 |
+
"rewards/accuracies": 0.78125,
|
9666 |
+
"rewards/chosen": -0.2095116674900055,
|
9667 |
+
"rewards/margins": 3.9784107208251953,
|
9668 |
+
"rewards/rejected": -4.187922477722168,
|
9669 |
+
"step": 1288
|
9670 |
+
},
|
9671 |
+
{
|
9672 |
+
"epoch": 1.6051014853410064,
|
9673 |
+
"grad_norm": 9.747991561889648,
|
9674 |
+
"learning_rate": 2.0951267947941143e-08,
|
9675 |
+
"logits/chosen": -10.8104248046875,
|
9676 |
+
"logits/rejected": -10.815136909484863,
|
9677 |
+
"logps/chosen": -23.734474182128906,
|
9678 |
+
"logps/rejected": -60.64533233642578,
|
9679 |
+
"loss": 0.3503,
|
9680 |
+
"rewards/accuracies": 0.65625,
|
9681 |
+
"rewards/chosen": -0.4252595007419586,
|
9682 |
+
"rewards/margins": 3.603969097137451,
|
9683 |
+
"rewards/rejected": -4.029229164123535,
|
9684 |
+
"step": 1290
|
9685 |
+
},
|
9686 |
+
{
|
9687 |
+
"epoch": 1.6075900147756434,
|
9688 |
+
"grad_norm": 4.882062911987305,
|
9689 |
+
"learning_rate": 2.0696424441225036e-08,
|
9690 |
+
"logits/chosen": -10.807500839233398,
|
9691 |
+
"logits/rejected": -10.805845260620117,
|
9692 |
+
"logps/chosen": -21.90993309020996,
|
9693 |
+
"logps/rejected": -65.13987731933594,
|
9694 |
+
"loss": 0.24,
|
9695 |
+
"rewards/accuracies": 0.75,
|
9696 |
+
"rewards/chosen": -0.2415923923254013,
|
9697 |
+
"rewards/margins": 4.038329124450684,
|
9698 |
+
"rewards/rejected": -4.279921531677246,
|
9699 |
+
"step": 1292
|
9700 |
+
},
|
9701 |
+
{
|
9702 |
+
"epoch": 1.6100785442102807,
|
9703 |
+
"grad_norm": 3.259903907775879,
|
9704 |
+
"learning_rate": 2.0442961324913686e-08,
|
9705 |
+
"logits/chosen": -10.808277130126953,
|
9706 |
+
"logits/rejected": -10.814513206481934,
|
9707 |
+
"logps/chosen": -21.407821655273438,
|
9708 |
+
"logps/rejected": -55.829368591308594,
|
9709 |
+
"loss": 0.3427,
|
9710 |
+
"rewards/accuracies": 0.75,
|
9711 |
+
"rewards/chosen": -0.20267558097839355,
|
9712 |
+
"rewards/margins": 3.1913931369781494,
|
9713 |
+
"rewards/rejected": -3.394068717956543,
|
9714 |
+
"step": 1294
|
9715 |
+
},
|
9716 |
+
{
|
9717 |
+
"epoch": 1.612567073644918,
|
9718 |
+
"grad_norm": 9.57787036895752,
|
9719 |
+
"learning_rate": 2.0190883010889615e-08,
|
9720 |
+
"logits/chosen": -10.820773124694824,
|
9721 |
+
"logits/rejected": -10.818120956420898,
|
9722 |
+
"logps/chosen": -15.357433319091797,
|
9723 |
+
"logps/rejected": -41.842857360839844,
|
9724 |
+
"loss": 0.3415,
|
9725 |
+
"rewards/accuracies": 0.71875,
|
9726 |
+
"rewards/chosen": 0.40116557478904724,
|
9727 |
+
"rewards/margins": 2.474684715270996,
|
9728 |
+
"rewards/rejected": -2.073519229888916,
|
9729 |
+
"step": 1296
|
9730 |
+
},
|
9731 |
+
{
|
9732 |
+
"epoch": 1.615055603079555,
|
9733 |
+
"grad_norm": 8.003713607788086,
|
9734 |
+
"learning_rate": 1.9940193886930777e-08,
|
9735 |
+
"logits/chosen": -10.783833503723145,
|
9736 |
+
"logits/rejected": -10.784221649169922,
|
9737 |
+
"logps/chosen": -24.231639862060547,
|
9738 |
+
"logps/rejected": -71.94761657714844,
|
9739 |
+
"loss": 0.2483,
|
9740 |
+
"rewards/accuracies": 0.8125,
|
9741 |
+
"rewards/chosen": -0.4585307240486145,
|
9742 |
+
"rewards/margins": 4.653621673583984,
|
9743 |
+
"rewards/rejected": -5.112152576446533,
|
9744 |
+
"step": 1298
|
9745 |
+
},
|
9746 |
+
{
|
9747 |
+
"epoch": 1.6175441325141924,
|
9748 |
+
"grad_norm": 8.922944068908691,
|
9749 |
+
"learning_rate": 1.969089831663443e-08,
|
9750 |
+
"logits/chosen": -10.830660820007324,
|
9751 |
+
"logits/rejected": -10.817536354064941,
|
9752 |
+
"logps/chosen": -23.25543785095215,
|
9753 |
+
"logps/rejected": -53.022544860839844,
|
9754 |
+
"loss": 0.3277,
|
9755 |
+
"rewards/accuracies": 0.65625,
|
9756 |
+
"rewards/chosen": -0.34474945068359375,
|
9757 |
+
"rewards/margins": 2.839651346206665,
|
9758 |
+
"rewards/rejected": -3.1844005584716797,
|
9759 |
+
"step": 1300
|
9760 |
+
},
|
9761 |
+
{
|
9762 |
+
"epoch": 1.6200326619488297,
|
9763 |
+
"grad_norm": 9.474493980407715,
|
9764 |
+
"learning_rate": 1.9443000639341045e-08,
|
9765 |
+
"logits/chosen": -10.812176704406738,
|
9766 |
+
"logits/rejected": -10.77513599395752,
|
9767 |
+
"logps/chosen": -20.876327514648438,
|
9768 |
+
"logps/rejected": -52.44502258300781,
|
9769 |
+
"loss": 0.2945,
|
9770 |
+
"rewards/accuracies": 0.5625,
|
9771 |
+
"rewards/chosen": -0.11885665357112885,
|
9772 |
+
"rewards/margins": 3.071859121322632,
|
9773 |
+
"rewards/rejected": -3.190715789794922,
|
9774 |
+
"step": 1302
|
9775 |
+
},
|
9776 |
+
{
|
9777 |
+
"epoch": 1.6225211913834667,
|
9778 |
+
"grad_norm": 24.406034469604492,
|
9779 |
+
"learning_rate": 1.919650517005872e-08,
|
9780 |
+
"logits/chosen": -10.835334777832031,
|
9781 |
+
"logits/rejected": -10.845525741577148,
|
9782 |
+
"logps/chosen": -28.61736297607422,
|
9783 |
+
"logps/rejected": -68.78744506835938,
|
9784 |
+
"loss": 0.3318,
|
9785 |
+
"rewards/accuracies": 0.625,
|
9786 |
+
"rewards/chosen": -0.8773050308227539,
|
9787 |
+
"rewards/margins": 3.880265712738037,
|
9788 |
+
"rewards/rejected": -4.757571220397949,
|
9789 |
+
"step": 1304
|
9790 |
+
},
|
9791 |
+
{
|
9792 |
+
"epoch": 1.625009720818104,
|
9793 |
+
"grad_norm": 29.102094650268555,
|
9794 |
+
"learning_rate": 1.895141619938825e-08,
|
9795 |
+
"logits/chosen": -10.814085006713867,
|
9796 |
+
"logits/rejected": -10.81423568725586,
|
9797 |
+
"logps/chosen": -21.529321670532227,
|
9798 |
+
"logps/rejected": -57.59548568725586,
|
9799 |
+
"loss": 0.3405,
|
9800 |
+
"rewards/accuracies": 0.65625,
|
9801 |
+
"rewards/chosen": -0.19233551621437073,
|
9802 |
+
"rewards/margins": 3.4261107444763184,
|
9803 |
+
"rewards/rejected": -3.618446111679077,
|
9804 |
+
"step": 1306
|
9805 |
+
},
|
9806 |
+
{
|
9807 |
+
"epoch": 1.6274982502527413,
|
9808 |
+
"grad_norm": 18.181053161621094,
|
9809 |
+
"learning_rate": 1.8707737993448247e-08,
|
9810 |
+
"logits/chosen": -10.777032852172852,
|
9811 |
+
"logits/rejected": -10.783935546875,
|
9812 |
+
"logps/chosen": -21.770933151245117,
|
9813 |
+
"logps/rejected": -70.67555236816406,
|
9814 |
+
"loss": 0.2861,
|
9815 |
+
"rewards/accuracies": 0.75,
|
9816 |
+
"rewards/chosen": -0.20613786578178406,
|
9817 |
+
"rewards/margins": 4.698180675506592,
|
9818 |
+
"rewards/rejected": -4.904318332672119,
|
9819 |
+
"step": 1308
|
9820 |
+
},
|
9821 |
+
{
|
9822 |
+
"epoch": 1.6299867796873784,
|
9823 |
+
"grad_norm": 18.842073440551758,
|
9824 |
+
"learning_rate": 1.8465474793801085e-08,
|
9825 |
+
"logits/chosen": -10.790057182312012,
|
9826 |
+
"logits/rejected": -10.789998054504395,
|
9827 |
+
"logps/chosen": -25.611600875854492,
|
9828 |
+
"logps/rejected": -61.528900146484375,
|
9829 |
+
"loss": 0.3441,
|
9830 |
+
"rewards/accuracies": 0.5625,
|
9831 |
+
"rewards/chosen": -0.5731645226478577,
|
9832 |
+
"rewards/margins": 3.542222499847412,
|
9833 |
+
"rewards/rejected": -4.115387439727783,
|
9834 |
+
"step": 1310
|
9835 |
+
},
|
9836 |
+
{
|
9837 |
+
"epoch": 1.6324753091220157,
|
9838 |
+
"grad_norm": 2.9410083293914795,
|
9839 |
+
"learning_rate": 1.8224630817378827e-08,
|
9840 |
+
"logits/chosen": -10.817235946655273,
|
9841 |
+
"logits/rejected": -10.816126823425293,
|
9842 |
+
"logps/chosen": -23.82830047607422,
|
9843 |
+
"logps/rejected": -59.01426696777344,
|
9844 |
+
"loss": 0.2962,
|
9845 |
+
"rewards/accuracies": 0.65625,
|
9846 |
+
"rewards/chosen": -0.39294612407684326,
|
9847 |
+
"rewards/margins": 3.3668603897094727,
|
9848 |
+
"rewards/rejected": -3.7598063945770264,
|
9849 |
+
"step": 1312
|
9850 |
+
},
|
9851 |
+
{
|
9852 |
+
"epoch": 1.634963838556653,
|
9853 |
+
"grad_norm": 34.97274398803711,
|
9854 |
+
"learning_rate": 1.7985210256410088e-08,
|
9855 |
+
"logits/chosen": -10.75239372253418,
|
9856 |
+
"logits/rejected": -10.75178337097168,
|
9857 |
+
"logps/chosen": -26.019506454467773,
|
9858 |
+
"logps/rejected": -63.77971267700195,
|
9859 |
+
"loss": 0.4301,
|
9860 |
+
"rewards/accuracies": 0.65625,
|
9861 |
+
"rewards/chosen": -0.6273402571678162,
|
9862 |
+
"rewards/margins": 3.5424182415008545,
|
9863 |
+
"rewards/rejected": -4.1697587966918945,
|
9864 |
+
"step": 1314
|
9865 |
+
},
|
9866 |
+
{
|
9867 |
+
"epoch": 1.63745236799129,
|
9868 |
+
"grad_norm": 2.5021934509277344,
|
9869 |
+
"learning_rate": 1.7747217278346838e-08,
|
9870 |
+
"logits/chosen": -10.83252239227295,
|
9871 |
+
"logits/rejected": -10.830848693847656,
|
9872 |
+
"logps/chosen": -23.322322845458984,
|
9873 |
+
"logps/rejected": -55.16864013671875,
|
9874 |
+
"loss": 0.3096,
|
9875 |
+
"rewards/accuracies": 0.75,
|
9876 |
+
"rewards/chosen": -0.384496808052063,
|
9877 |
+
"rewards/margins": 2.9950833320617676,
|
9878 |
+
"rewards/rejected": -3.37958025932312,
|
9879 |
+
"step": 1316
|
9880 |
+
},
|
9881 |
+
{
|
9882 |
+
"epoch": 1.6399408974259275,
|
9883 |
+
"grad_norm": 33.00211715698242,
|
9884 |
+
"learning_rate": 1.7510656025792004e-08,
|
9885 |
+
"logits/chosen": -10.799005508422852,
|
9886 |
+
"logits/rejected": -10.798194885253906,
|
9887 |
+
"logps/chosen": -26.886280059814453,
|
9888 |
+
"logps/rejected": -58.32006072998047,
|
9889 |
+
"loss": 0.3002,
|
9890 |
+
"rewards/accuracies": 0.71875,
|
9891 |
+
"rewards/chosen": -0.7233449816703796,
|
9892 |
+
"rewards/margins": 3.111297130584717,
|
9893 |
+
"rewards/rejected": -3.8346424102783203,
|
9894 |
+
"step": 1318
|
9895 |
+
},
|
9896 |
+
{
|
9897 |
+
"epoch": 1.6424294268605646,
|
9898 |
+
"grad_norm": 7.195638179779053,
|
9899 |
+
"learning_rate": 1.7275530616427335e-08,
|
9900 |
+
"logits/chosen": -10.778608322143555,
|
9901 |
+
"logits/rejected": -10.783539772033691,
|
9902 |
+
"logps/chosen": -27.22859001159668,
|
9903 |
+
"logps/rejected": -60.26847839355469,
|
9904 |
+
"loss": 0.3174,
|
9905 |
+
"rewards/accuracies": 0.59375,
|
9906 |
+
"rewards/chosen": -0.7433955669403076,
|
9907 |
+
"rewards/margins": 3.1764450073242188,
|
9908 |
+
"rewards/rejected": -3.9198405742645264,
|
9909 |
+
"step": 1320
|
9910 |
+
},
|
9911 |
+
{
|
9912 |
+
"epoch": 1.6449179562952017,
|
9913 |
+
"grad_norm": 14.944016456604004,
|
9914 |
+
"learning_rate": 1.7041845142941612e-08,
|
9915 |
+
"logits/chosen": -10.831104278564453,
|
9916 |
+
"logits/rejected": -10.8374605178833,
|
9917 |
+
"logps/chosen": -22.72991371154785,
|
9918 |
+
"logps/rejected": -69.8420181274414,
|
9919 |
+
"loss": 0.317,
|
9920 |
+
"rewards/accuracies": 0.84375,
|
9921 |
+
"rewards/chosen": -0.288443922996521,
|
9922 |
+
"rewards/margins": 4.563740253448486,
|
9923 |
+
"rewards/rejected": -4.852183818817139,
|
9924 |
+
"step": 1322
|
9925 |
+
},
|
9926 |
+
{
|
9927 |
+
"epoch": 1.6474064857298392,
|
9928 |
+
"grad_norm": 17.540937423706055,
|
9929 |
+
"learning_rate": 1.6809603672959615e-08,
|
9930 |
+
"logits/chosen": -10.862117767333984,
|
9931 |
+
"logits/rejected": -10.859867095947266,
|
9932 |
+
"logps/chosen": -25.939712524414062,
|
9933 |
+
"logps/rejected": -51.00326156616211,
|
9934 |
+
"loss": 0.3688,
|
9935 |
+
"rewards/accuracies": 0.5,
|
9936 |
+
"rewards/chosen": -0.6566795110702515,
|
9937 |
+
"rewards/margins": 2.3946855068206787,
|
9938 |
+
"rewards/rejected": -3.051365375518799,
|
9939 |
+
"step": 1324
|
9940 |
+
},
|
9941 |
+
{
|
9942 |
+
"epoch": 1.6498950151644762,
|
9943 |
+
"grad_norm": 12.726731300354004,
|
9944 |
+
"learning_rate": 1.6578810248971142e-08,
|
9945 |
+
"logits/chosen": -10.80691909790039,
|
9946 |
+
"logits/rejected": -10.802922248840332,
|
9947 |
+
"logps/chosen": -20.270612716674805,
|
9948 |
+
"logps/rejected": -53.08592987060547,
|
9949 |
+
"loss": 0.329,
|
9950 |
+
"rewards/accuracies": 0.71875,
|
9951 |
+
"rewards/chosen": -0.04774314910173416,
|
9952 |
+
"rewards/margins": 3.1508731842041016,
|
9953 |
+
"rewards/rejected": -3.1986162662506104,
|
9954 |
+
"step": 1326
|
9955 |
+
},
|
9956 |
+
{
|
9957 |
+
"epoch": 1.6523835445991133,
|
9958 |
+
"grad_norm": 63.046714782714844,
|
9959 |
+
"learning_rate": 1.6349468888260764e-08,
|
9960 |
+
"logits/chosen": -10.805728912353516,
|
9961 |
+
"logits/rejected": -10.807948112487793,
|
9962 |
+
"logps/chosen": -22.131887435913086,
|
9963 |
+
"logps/rejected": -50.19975662231445,
|
9964 |
+
"loss": 0.3256,
|
9965 |
+
"rewards/accuracies": 0.625,
|
9966 |
+
"rewards/chosen": -0.2647934556007385,
|
9967 |
+
"rewards/margins": 2.623716354370117,
|
9968 |
+
"rewards/rejected": -2.88850998878479,
|
9969 |
+
"step": 1328
|
9970 |
+
},
|
9971 |
+
{
|
9972 |
+
"epoch": 1.6548720740337508,
|
9973 |
+
"grad_norm": 18.786680221557617,
|
9974 |
+
"learning_rate": 1.6121583582837772e-08,
|
9975 |
+
"logits/chosen": -10.854766845703125,
|
9976 |
+
"logits/rejected": -10.862516403198242,
|
9977 |
+
"logps/chosen": -25.881559371948242,
|
9978 |
+
"logps/rejected": -60.02587127685547,
|
9979 |
+
"loss": 0.3334,
|
9980 |
+
"rewards/accuracies": 0.59375,
|
9981 |
+
"rewards/chosen": -0.6450967788696289,
|
9982 |
+
"rewards/margins": 3.2605414390563965,
|
9983 |
+
"rewards/rejected": -3.9056379795074463,
|
9984 |
+
"step": 1330
|
9985 |
+
},
|
9986 |
+
{
|
9987 |
+
"epoch": 1.6573606034683879,
|
9988 |
+
"grad_norm": 5.466382026672363,
|
9989 |
+
"learning_rate": 1.589515829936684e-08,
|
9990 |
+
"logits/chosen": -10.840960502624512,
|
9991 |
+
"logits/rejected": -10.838695526123047,
|
9992 |
+
"logps/chosen": -21.14974594116211,
|
9993 |
+
"logps/rejected": -54.80947494506836,
|
9994 |
+
"loss": 0.3163,
|
9995 |
+
"rewards/accuracies": 0.625,
|
9996 |
+
"rewards/chosen": -0.16069187223911285,
|
9997 |
+
"rewards/margins": 3.2574622631073,
|
9998 |
+
"rewards/rejected": -3.418154239654541,
|
9999 |
+
"step": 1332
|
10000 |
+
},
|
10001 |
+
{
|
10002 |
+
"epoch": 1.6598491329030252,
|
10003 |
+
"grad_norm": 3.6651666164398193,
|
10004 |
+
"learning_rate": 1.5670196979098837e-08,
|
10005 |
+
"logits/chosen": -10.87842845916748,
|
10006 |
+
"logits/rejected": -10.880535125732422,
|
10007 |
+
"logps/chosen": -25.185527801513672,
|
10008 |
+
"logps/rejected": -65.58544921875,
|
10009 |
+
"loss": 0.3046,
|
10010 |
+
"rewards/accuracies": 0.53125,
|
10011 |
+
"rewards/chosen": -0.5343016386032104,
|
10012 |
+
"rewards/margins": 3.9803366661071777,
|
10013 |
+
"rewards/rejected": -4.514638423919678,
|
10014 |
+
"step": 1334
|
10015 |
+
},
|
10016 |
+
{
|
10017 |
+
"epoch": 1.6623376623376624,
|
10018 |
+
"grad_norm": 22.037355422973633,
|
10019 |
+
"learning_rate": 1.5446703537802342e-08,
|
10020 |
+
"logits/chosen": -10.828059196472168,
|
10021 |
+
"logits/rejected": -10.813085556030273,
|
10022 |
+
"logps/chosen": -24.3900089263916,
|
10023 |
+
"logps/rejected": -57.214691162109375,
|
10024 |
+
"loss": 0.3129,
|
10025 |
+
"rewards/accuracies": 0.75,
|
10026 |
+
"rewards/chosen": -0.4744144380092621,
|
10027 |
+
"rewards/margins": 3.0794365406036377,
|
10028 |
+
"rewards/rejected": -3.5538506507873535,
|
10029 |
+
"step": 1336
|
10030 |
+
},
|
10031 |
+
{
|
10032 |
+
"epoch": 1.6648261917722995,
|
10033 |
+
"grad_norm": 4.078120231628418,
|
10034 |
+
"learning_rate": 1.5224681865695422e-08,
|
10035 |
+
"logits/chosen": -10.837578773498535,
|
10036 |
+
"logits/rejected": -10.836234092712402,
|
10037 |
+
"logps/chosen": -22.567678451538086,
|
10038 |
+
"logps/rejected": -62.71944808959961,
|
10039 |
+
"loss": 0.3147,
|
10040 |
+
"rewards/accuracies": 0.71875,
|
10041 |
+
"rewards/chosen": -0.32626694440841675,
|
10042 |
+
"rewards/margins": 3.8645825386047363,
|
10043 |
+
"rewards/rejected": -4.190849304199219,
|
10044 |
+
"step": 1338
|
10045 |
+
},
|
10046 |
+
{
|
10047 |
+
"epoch": 1.6673147212069368,
|
10048 |
+
"grad_norm": 12.054242134094238,
|
10049 |
+
"learning_rate": 1.5004135827377905e-08,
|
10050 |
+
"logits/chosen": -10.865391731262207,
|
10051 |
+
"logits/rejected": -10.868090629577637,
|
10052 |
+
"logps/chosen": -24.837133407592773,
|
10053 |
+
"logps/rejected": -87.59353637695312,
|
10054 |
+
"loss": 0.2993,
|
10055 |
+
"rewards/accuracies": 0.78125,
|
10056 |
+
"rewards/chosen": -0.4641028940677643,
|
10057 |
+
"rewards/margins": 6.047280311584473,
|
10058 |
+
"rewards/rejected": -6.511383533477783,
|
10059 |
+
"step": 1340
|
10060 |
+
},
|
10061 |
+
{
|
10062 |
+
"epoch": 1.669803250641574,
|
10063 |
+
"grad_norm": 33.8203125,
|
10064 |
+
"learning_rate": 1.4785069261764182e-08,
|
10065 |
+
"logits/chosen": -10.816351890563965,
|
10066 |
+
"logits/rejected": -10.816112518310547,
|
10067 |
+
"logps/chosen": -27.667259216308594,
|
10068 |
+
"logps/rejected": -46.164127349853516,
|
10069 |
+
"loss": 0.331,
|
10070 |
+
"rewards/accuracies": 0.46875,
|
10071 |
+
"rewards/chosen": -0.7790037393569946,
|
10072 |
+
"rewards/margins": 1.784177303314209,
|
10073 |
+
"rewards/rejected": -2.563180923461914,
|
10074 |
+
"step": 1342
|
10075 |
+
},
|
10076 |
+
{
|
10077 |
+
"epoch": 1.6722917800762112,
|
10078 |
+
"grad_norm": 5.524724960327148,
|
10079 |
+
"learning_rate": 1.4567485982016258e-08,
|
10080 |
+
"logits/chosen": -10.838247299194336,
|
10081 |
+
"logits/rejected": -10.837743759155273,
|
10082 |
+
"logps/chosen": -24.21609878540039,
|
10083 |
+
"logps/rejected": -47.889774322509766,
|
10084 |
+
"loss": 0.3116,
|
10085 |
+
"rewards/accuracies": 0.5,
|
10086 |
+
"rewards/chosen": -0.45775747299194336,
|
10087 |
+
"rewards/margins": 2.266995429992676,
|
10088 |
+
"rewards/rejected": -2.7247531414031982,
|
10089 |
+
"step": 1344
|
10090 |
+
},
|
10091 |
+
{
|
10092 |
+
"epoch": 1.6747803095108484,
|
10093 |
+
"grad_norm": 16.984832763671875,
|
10094 |
+
"learning_rate": 1.4351389775477573e-08,
|
10095 |
+
"logits/chosen": -10.84825611114502,
|
10096 |
+
"logits/rejected": -10.851517677307129,
|
10097 |
+
"logps/chosen": -28.410385131835938,
|
10098 |
+
"logps/rejected": -76.4227294921875,
|
10099 |
+
"loss": 0.2681,
|
10100 |
+
"rewards/accuracies": 0.84375,
|
10101 |
+
"rewards/chosen": -0.8531373143196106,
|
10102 |
+
"rewards/margins": 4.609513282775879,
|
10103 |
+
"rewards/rejected": -5.462650299072266,
|
10104 |
+
"step": 1346
|
10105 |
+
},
|
10106 |
+
{
|
10107 |
+
"epoch": 1.6772688389454857,
|
10108 |
+
"grad_norm": 7.401856422424316,
|
10109 |
+
"learning_rate": 1.4136784403606839e-08,
|
10110 |
+
"logits/chosen": -10.8283109664917,
|
10111 |
+
"logits/rejected": -10.829707145690918,
|
10112 |
+
"logps/chosen": -26.35080337524414,
|
10113 |
+
"logps/rejected": -65.35711669921875,
|
10114 |
+
"loss": 0.2665,
|
10115 |
+
"rewards/accuracies": 0.71875,
|
10116 |
+
"rewards/chosen": -0.6642946004867554,
|
10117 |
+
"rewards/margins": 3.7643990516662598,
|
10118 |
+
"rewards/rejected": -4.428694248199463,
|
10119 |
+
"step": 1348
|
10120 |
+
},
|
10121 |
+
{
|
10122 |
+
"epoch": 1.6797573683801228,
|
10123 |
+
"grad_norm": 6.8130621910095215,
|
10124 |
+
"learning_rate": 1.3923673601912777e-08,
|
10125 |
+
"logits/chosen": -10.753373146057129,
|
10126 |
+
"logits/rejected": -10.752891540527344,
|
10127 |
+
"logps/chosen": -25.6737060546875,
|
10128 |
+
"logps/rejected": -65.83049011230469,
|
10129 |
+
"loss": 0.3014,
|
10130 |
+
"rewards/accuracies": 0.78125,
|
10131 |
+
"rewards/chosen": -0.5438342094421387,
|
10132 |
+
"rewards/margins": 3.9245076179504395,
|
10133 |
+
"rewards/rejected": -4.468341827392578,
|
10134 |
+
"step": 1350
|
10135 |
+
},
|
10136 |
+
{
|
10137 |
+
"epoch": 1.68224589781476,
|
10138 |
+
"grad_norm": 20.65499496459961,
|
10139 |
+
"learning_rate": 1.3712061079889014e-08,
|
10140 |
+
"logits/chosen": -10.788976669311523,
|
10141 |
+
"logits/rejected": -10.791670799255371,
|
10142 |
+
"logps/chosen": -22.58700942993164,
|
10143 |
+
"logps/rejected": -53.30634689331055,
|
10144 |
+
"loss": 0.3424,
|
10145 |
+
"rewards/accuracies": 0.65625,
|
10146 |
+
"rewards/chosen": -0.31589654088020325,
|
10147 |
+
"rewards/margins": 2.8593664169311523,
|
10148 |
+
"rewards/rejected": -3.175262928009033,
|
10149 |
+
"step": 1352
|
10150 |
+
},
|
10151 |
+
{
|
10152 |
+
"epoch": 1.6847344272493974,
|
10153 |
+
"grad_norm": 11.894336700439453,
|
10154 |
+
"learning_rate": 1.3501950520949434e-08,
|
10155 |
+
"logits/chosen": -10.869431495666504,
|
10156 |
+
"logits/rejected": -10.864688873291016,
|
10157 |
+
"logps/chosen": -24.943134307861328,
|
10158 |
+
"logps/rejected": -54.439842224121094,
|
10159 |
+
"loss": 0.3573,
|
10160 |
+
"rewards/accuracies": 0.71875,
|
10161 |
+
"rewards/chosen": -0.5166460275650024,
|
10162 |
+
"rewards/margins": 2.882528781890869,
|
10163 |
+
"rewards/rejected": -3.3991751670837402,
|
10164 |
+
"step": 1354
|
10165 |
+
},
|
10166 |
+
{
|
10167 |
+
"epoch": 1.6872229566840344,
|
10168 |
+
"grad_norm": 8.13518238067627,
|
10169 |
+
"learning_rate": 1.3293345582364224e-08,
|
10170 |
+
"logits/chosen": -10.843225479125977,
|
10171 |
+
"logits/rejected": -10.838338851928711,
|
10172 |
+
"logps/chosen": -23.392269134521484,
|
10173 |
+
"logps/rejected": -65.21499633789062,
|
10174 |
+
"loss": 0.3396,
|
10175 |
+
"rewards/accuracies": 0.71875,
|
10176 |
+
"rewards/chosen": -0.34925830364227295,
|
10177 |
+
"rewards/margins": 4.064693927764893,
|
10178 |
+
"rewards/rejected": -4.413951873779297,
|
10179 |
+
"step": 1356
|
10180 |
+
},
|
10181 |
+
{
|
10182 |
+
"epoch": 1.6897114861186717,
|
10183 |
+
"grad_norm": 5.220905303955078,
|
10184 |
+
"learning_rate": 1.3086249895196043e-08,
|
10185 |
+
"logits/chosen": -10.816040992736816,
|
10186 |
+
"logits/rejected": -10.818885803222656,
|
10187 |
+
"logps/chosen": -21.512014389038086,
|
10188 |
+
"logps/rejected": -65.09014129638672,
|
10189 |
+
"loss": 0.3114,
|
10190 |
+
"rewards/accuracies": 0.6875,
|
10191 |
+
"rewards/chosen": -0.15071852505207062,
|
10192 |
+
"rewards/margins": 4.254648208618164,
|
10193 |
+
"rewards/rejected": -4.40536642074585,
|
10194 |
+
"step": 1358
|
10195 |
+
},
|
10196 |
+
{
|
10197 |
+
"epoch": 1.692200015553309,
|
10198 |
+
"grad_norm": 85.90353393554688,
|
10199 |
+
"learning_rate": 1.2880667064237004e-08,
|
10200 |
+
"logits/chosen": -10.824080467224121,
|
10201 |
+
"logits/rejected": -10.832862854003906,
|
10202 |
+
"logps/chosen": -23.073331832885742,
|
10203 |
+
"logps/rejected": -58.96718978881836,
|
10204 |
+
"loss": 0.3724,
|
10205 |
+
"rewards/accuracies": 0.5,
|
10206 |
+
"rewards/chosen": -0.34528830647468567,
|
10207 |
+
"rewards/margins": 3.405324935913086,
|
10208 |
+
"rewards/rejected": -3.750613212585449,
|
10209 |
+
"step": 1360
|
10210 |
+
},
|
10211 |
+
{
|
10212 |
+
"epoch": 1.694688544987946,
|
10213 |
+
"grad_norm": 6.757018566131592,
|
10214 |
+
"learning_rate": 1.2676600667945714e-08,
|
10215 |
+
"logits/chosen": -10.774547576904297,
|
10216 |
+
"logits/rejected": -10.776646614074707,
|
10217 |
+
"logps/chosen": -27.236501693725586,
|
10218 |
+
"logps/rejected": -47.35329055786133,
|
10219 |
+
"loss": 0.3518,
|
10220 |
+
"rewards/accuracies": 0.65625,
|
10221 |
+
"rewards/chosen": -0.6959034204483032,
|
10222 |
+
"rewards/margins": 2.0354065895080566,
|
10223 |
+
"rewards/rejected": -2.7313101291656494,
|
10224 |
+
"step": 1362
|
10225 |
+
},
|
10226 |
+
{
|
10227 |
+
"epoch": 1.6971770744225834,
|
10228 |
+
"grad_norm": 6.0155487060546875,
|
10229 |
+
"learning_rate": 1.2474054258385225e-08,
|
10230 |
+
"logits/chosen": -10.811131477355957,
|
10231 |
+
"logits/rejected": -10.817914962768555,
|
10232 |
+
"logps/chosen": -29.25282859802246,
|
10233 |
+
"logps/rejected": -74.38288879394531,
|
10234 |
+
"loss": 0.276,
|
10235 |
+
"rewards/accuracies": 0.9375,
|
10236 |
+
"rewards/chosen": -0.927348256111145,
|
10237 |
+
"rewards/margins": 4.406147003173828,
|
10238 |
+
"rewards/rejected": -5.333494663238525,
|
10239 |
+
"step": 1364
|
10240 |
+
},
|
10241 |
+
{
|
10242 |
+
"epoch": 1.6996656038572207,
|
10243 |
+
"grad_norm": 60.83378601074219,
|
10244 |
+
"learning_rate": 1.2273031361160957e-08,
|
10245 |
+
"logits/chosen": -10.810754776000977,
|
10246 |
+
"logits/rejected": -10.807637214660645,
|
10247 |
+
"logps/chosen": -25.723983764648438,
|
10248 |
+
"logps/rejected": -63.92774200439453,
|
10249 |
+
"loss": 0.3274,
|
10250 |
+
"rewards/accuracies": 0.78125,
|
10251 |
+
"rewards/chosen": -0.5800298452377319,
|
10252 |
+
"rewards/margins": 3.7809581756591797,
|
10253 |
+
"rewards/rejected": -4.360988140106201,
|
10254 |
+
"step": 1366
|
10255 |
+
},
|
10256 |
+
{
|
10257 |
+
"epoch": 1.7021541332918577,
|
10258 |
+
"grad_norm": 5.306698799133301,
|
10259 |
+
"learning_rate": 1.207353547535953e-08,
|
10260 |
+
"logits/chosen": -10.826414108276367,
|
10261 |
+
"logits/rejected": -10.828743934631348,
|
10262 |
+
"logps/chosen": -22.830368041992188,
|
10263 |
+
"logps/rejected": -55.398826599121094,
|
10264 |
+
"loss": 0.2879,
|
10265 |
+
"rewards/accuracies": 0.6875,
|
10266 |
+
"rewards/chosen": -0.31796175241470337,
|
10267 |
+
"rewards/margins": 3.1197590827941895,
|
10268 |
+
"rewards/rejected": -3.437720775604248,
|
10269 |
+
"step": 1368
|
10270 |
+
},
|
10271 |
+
{
|
10272 |
+
"epoch": 1.7046426627264952,
|
10273 |
+
"grad_norm": 51.56437301635742,
|
10274 |
+
"learning_rate": 1.1875570073487785e-08,
|
10275 |
+
"logits/chosen": -10.848183631896973,
|
10276 |
+
"logits/rejected": -10.847176551818848,
|
10277 |
+
"logps/chosen": -25.83056640625,
|
10278 |
+
"logps/rejected": -67.20542907714844,
|
10279 |
+
"loss": 0.2889,
|
10280 |
+
"rewards/accuracies": 0.8125,
|
10281 |
+
"rewards/chosen": -0.63761967420578,
|
10282 |
+
"rewards/margins": 3.966122627258301,
|
10283 |
+
"rewards/rejected": -4.603742599487305,
|
10284 |
+
"step": 1370
|
10285 |
+
},
|
10286 |
+
{
|
10287 |
+
"epoch": 1.7071311921611323,
|
10288 |
+
"grad_norm": 9.324451446533203,
|
10289 |
+
"learning_rate": 1.1679138601412253e-08,
|
10290 |
+
"logits/chosen": -10.814537048339844,
|
10291 |
+
"logits/rejected": -10.819119453430176,
|
10292 |
+
"logps/chosen": -21.487300872802734,
|
10293 |
+
"logps/rejected": -64.1480712890625,
|
10294 |
+
"loss": 0.3161,
|
10295 |
+
"rewards/accuracies": 0.625,
|
10296 |
+
"rewards/chosen": -0.13521310687065125,
|
10297 |
+
"rewards/margins": 4.159755706787109,
|
10298 |
+
"rewards/rejected": -4.294968605041504,
|
10299 |
+
"step": 1372
|
10300 |
+
},
|
10301 |
+
{
|
10302 |
+
"epoch": 1.7096197215957694,
|
10303 |
+
"grad_norm": 18.857152938842773,
|
10304 |
+
"learning_rate": 1.1484244478299366e-08,
|
10305 |
+
"logits/chosen": -10.806600570678711,
|
10306 |
+
"logits/rejected": -10.810232162475586,
|
10307 |
+
"logps/chosen": -22.422306060791016,
|
10308 |
+
"logps/rejected": -55.91395950317383,
|
10309 |
+
"loss": 0.342,
|
10310 |
+
"rewards/accuracies": 0.75,
|
10311 |
+
"rewards/chosen": -0.2938164174556732,
|
10312 |
+
"rewards/margins": 3.184936761856079,
|
10313 |
+
"rewards/rejected": -3.4787533283233643,
|
10314 |
+
"step": 1374
|
10315 |
+
},
|
10316 |
+
{
|
10317 |
+
"epoch": 1.7121082510304069,
|
10318 |
+
"grad_norm": 5.337169647216797,
|
10319 |
+
"learning_rate": 1.1290891096555744e-08,
|
10320 |
+
"logits/chosen": -10.812167167663574,
|
10321 |
+
"logits/rejected": -10.815803527832031,
|
10322 |
+
"logps/chosen": -22.376619338989258,
|
10323 |
+
"logps/rejected": -64.20872497558594,
|
10324 |
+
"loss": 0.3014,
|
10325 |
+
"rewards/accuracies": 0.75,
|
10326 |
+
"rewards/chosen": -0.24579763412475586,
|
10327 |
+
"rewards/margins": 4.0266289710998535,
|
10328 |
+
"rewards/rejected": -4.272426605224609,
|
10329 |
+
"step": 1376
|
10330 |
+
},
|
10331 |
+
{
|
10332 |
+
"epoch": 1.714596780465044,
|
10333 |
+
"grad_norm": 30.906925201416016,
|
10334 |
+
"learning_rate": 1.1099081821769296e-08,
|
10335 |
+
"logits/chosen": -10.828575134277344,
|
10336 |
+
"logits/rejected": -10.829512596130371,
|
10337 |
+
"logps/chosen": -24.83142852783203,
|
10338 |
+
"logps/rejected": -53.37904357910156,
|
10339 |
+
"loss": 0.2964,
|
10340 |
+
"rewards/accuracies": 0.65625,
|
10341 |
+
"rewards/chosen": -0.4947197735309601,
|
10342 |
+
"rewards/margins": 2.6709203720092773,
|
10343 |
+
"rewards/rejected": -3.165640354156494,
|
10344 |
+
"step": 1378
|
10345 |
+
},
|
10346 |
+
{
|
10347 |
+
"epoch": 1.717085309899681,
|
10348 |
+
"grad_norm": 11.849616050720215,
|
10349 |
+
"learning_rate": 1.090881999265051e-08,
|
10350 |
+
"logits/chosen": -10.887191772460938,
|
10351 |
+
"logits/rejected": -10.888714790344238,
|
10352 |
+
"logps/chosen": -29.812423706054688,
|
10353 |
+
"logps/rejected": -71.98737335205078,
|
10354 |
+
"loss": 0.2891,
|
10355 |
+
"rewards/accuracies": 0.78125,
|
10356 |
+
"rewards/chosen": -0.9166967868804932,
|
10357 |
+
"rewards/margins": 4.209402561187744,
|
10358 |
+
"rewards/rejected": -5.126099109649658,
|
10359 |
+
"step": 1380
|
10360 |
+
},
|
10361 |
+
{
|
10362 |
+
"epoch": 1.7195738393343185,
|
10363 |
+
"grad_norm": 12.149803161621094,
|
10364 |
+
"learning_rate": 1.0720108920974469e-08,
|
10365 |
+
"logits/chosen": -10.778029441833496,
|
10366 |
+
"logits/rejected": -10.781806945800781,
|
10367 |
+
"logps/chosen": -23.731386184692383,
|
10368 |
+
"logps/rejected": -68.31604766845703,
|
10369 |
+
"loss": 0.3045,
|
10370 |
+
"rewards/accuracies": 0.65625,
|
10371 |
+
"rewards/chosen": -0.35708463191986084,
|
10372 |
+
"rewards/margins": 4.310543060302734,
|
10373 |
+
"rewards/rejected": -4.667627811431885,
|
10374 |
+
"step": 1382
|
10375 |
+
},
|
10376 |
+
{
|
10377 |
+
"epoch": 1.7220623687689556,
|
10378 |
+
"grad_norm": 12.090971946716309,
|
10379 |
+
"learning_rate": 1.0532951891523123e-08,
|
10380 |
+
"logits/chosen": -10.829472541809082,
|
10381 |
+
"logits/rejected": -10.826078414916992,
|
10382 |
+
"logps/chosen": -26.86117172241211,
|
10383 |
+
"logps/rejected": -62.79159927368164,
|
10384 |
+
"loss": 0.2627,
|
10385 |
+
"rewards/accuracies": 0.71875,
|
10386 |
+
"rewards/chosen": -0.7374318838119507,
|
10387 |
+
"rewards/margins": 3.4001479148864746,
|
10388 |
+
"rewards/rejected": -4.137579917907715,
|
10389 |
+
"step": 1384
|
10390 |
+
},
|
10391 |
+
{
|
10392 |
+
"epoch": 1.7245508982035929,
|
10393 |
+
"grad_norm": 129.79551696777344,
|
10394 |
+
"learning_rate": 1.0347352162028088e-08,
|
10395 |
+
"logits/chosen": -10.817343711853027,
|
10396 |
+
"logits/rejected": -10.824586868286133,
|
10397 |
+
"logps/chosen": -23.552154541015625,
|
10398 |
+
"logps/rejected": -61.207462310791016,
|
10399 |
+
"loss": 0.3297,
|
10400 |
+
"rewards/accuracies": 0.65625,
|
10401 |
+
"rewards/chosen": -0.3887484073638916,
|
10402 |
+
"rewards/margins": 3.627763271331787,
|
10403 |
+
"rewards/rejected": -4.0165114402771,
|
10404 |
+
"step": 1386
|
10405 |
+
},
|
10406 |
+
{
|
10407 |
+
"epoch": 1.7270394276382302,
|
10408 |
+
"grad_norm": 3.878584146499634,
|
10409 |
+
"learning_rate": 1.0163312963114035e-08,
|
10410 |
+
"logits/chosen": -10.860746383666992,
|
10411 |
+
"logits/rejected": -10.86077880859375,
|
10412 |
+
"logps/chosen": -28.208192825317383,
|
10413 |
+
"logps/rejected": -57.151390075683594,
|
10414 |
+
"loss": 0.3755,
|
10415 |
+
"rewards/accuracies": 0.6875,
|
10416 |
+
"rewards/chosen": -0.8361051082611084,
|
10417 |
+
"rewards/margins": 2.7554969787597656,
|
10418 |
+
"rewards/rejected": -3.591602087020874,
|
10419 |
+
"step": 1388
|
10420 |
+
},
|
10421 |
+
{
|
10422 |
+
"epoch": 1.7295279570728672,
|
10423 |
+
"grad_norm": 2.7282052040100098,
|
10424 |
+
"learning_rate": 9.980837498242357e-09,
|
10425 |
+
"logits/chosen": -10.849210739135742,
|
10426 |
+
"logits/rejected": -10.844944953918457,
|
10427 |
+
"logps/chosen": -26.760292053222656,
|
10428 |
+
"logps/rejected": -63.53279495239258,
|
10429 |
+
"loss": 0.2893,
|
10430 |
+
"rewards/accuracies": 0.78125,
|
10431 |
+
"rewards/chosen": -0.6709414720535278,
|
10432 |
+
"rewards/margins": 3.6953835487365723,
|
10433 |
+
"rewards/rejected": -4.366325378417969,
|
10434 |
+
"step": 1390
|
10435 |
+
},
|
10436 |
+
{
|
10437 |
+
"epoch": 1.7320164865075045,
|
10438 |
+
"grad_norm": 13.552352905273438,
|
10439 |
+
"learning_rate": 9.799928943655488e-09,
|
10440 |
+
"logits/chosen": -10.841174125671387,
|
10441 |
+
"logits/rejected": -10.840930938720703,
|
10442 |
+
"logps/chosen": -22.428604125976562,
|
10443 |
+
"logps/rejected": -58.52720642089844,
|
10444 |
+
"loss": 0.297,
|
10445 |
+
"rewards/accuracies": 0.75,
|
10446 |
+
"rewards/chosen": -0.2888602912425995,
|
10447 |
+
"rewards/margins": 3.4409823417663574,
|
10448 |
+
"rewards/rejected": -3.7298426628112793,
|
10449 |
+
"step": 1392
|
10450 |
+
},
|
10451 |
+
{
|
10452 |
+
"epoch": 1.7345050159421418,
|
10453 |
+
"grad_norm": 5.00348424911499,
|
10454 |
+
"learning_rate": 9.620590448321553e-09,
|
10455 |
+
"logits/chosen": -10.81922435760498,
|
10456 |
+
"logits/rejected": -10.820487976074219,
|
10457 |
+
"logps/chosen": -29.074127197265625,
|
10458 |
+
"logps/rejected": -61.92654800415039,
|
10459 |
+
"loss": 0.3292,
|
10460 |
+
"rewards/accuracies": 0.6875,
|
10461 |
+
"rewards/chosen": -0.9312417507171631,
|
10462 |
+
"rewards/margins": 3.181020498275757,
|
10463 |
+
"rewards/rejected": -4.11226224899292,
|
10464 |
+
"step": 1394
|
10465 |
+
},
|
10466 |
+
{
|
10467 |
+
"epoch": 1.7369935453767789,
|
10468 |
+
"grad_norm": 8.92225456237793,
|
10469 |
+
"learning_rate": 9.442825133879607e-09,
|
10470 |
+
"logits/chosen": -10.817237854003906,
|
10471 |
+
"logits/rejected": -10.821587562561035,
|
10472 |
+
"logps/chosen": -28.017192840576172,
|
10473 |
+
"logps/rejected": -59.26190185546875,
|
10474 |
+
"loss": 0.3484,
|
10475 |
+
"rewards/accuracies": 0.59375,
|
10476 |
+
"rewards/chosen": -0.82759690284729,
|
10477 |
+
"rewards/margins": 3.0587477684020996,
|
10478 |
+
"rewards/rejected": -3.8863444328308105,
|
10479 |
+
"step": 1396
|
10480 |
+
},
|
10481 |
+
{
|
10482 |
+
"epoch": 1.7394820748114161,
|
10483 |
+
"grad_norm": 14.537239074707031,
|
10484 |
+
"learning_rate": 9.2666360945853e-09,
|
10485 |
+
"logits/chosen": -10.792232513427734,
|
10486 |
+
"logits/rejected": -10.797674179077148,
|
10487 |
+
"logps/chosen": -28.772098541259766,
|
10488 |
+
"logps/rejected": -68.55633544921875,
|
10489 |
+
"loss": 0.3414,
|
10490 |
+
"rewards/accuracies": 0.71875,
|
10491 |
+
"rewards/chosen": -0.8768026828765869,
|
10492 |
+
"rewards/margins": 3.842247486114502,
|
10493 |
+
"rewards/rejected": -4.719050407409668,
|
10494 |
+
"step": 1398
|
10495 |
+
},
|
10496 |
+
{
|
10497 |
+
"epoch": 1.7419706042460534,
|
10498 |
+
"grad_norm": 7.152594566345215,
|
10499 |
+
"learning_rate": 9.092026397256913e-09,
|
10500 |
+
"logits/chosen": -10.825244903564453,
|
10501 |
+
"logits/rejected": -10.82767105102539,
|
10502 |
+
"logps/chosen": -19.60363006591797,
|
10503 |
+
"logps/rejected": -46.500396728515625,
|
10504 |
+
"loss": 0.3422,
|
10505 |
+
"rewards/accuracies": 0.71875,
|
10506 |
+
"rewards/chosen": -0.033207476139068604,
|
10507 |
+
"rewards/margins": 2.5526838302612305,
|
10508 |
+
"rewards/rejected": -2.5858914852142334,
|
10509 |
+
"step": 1400
|
10510 |
+
},
|
10511 |
+
{
|
10512 |
+
"epoch": 1.7444591336806905,
|
10513 |
+
"grad_norm": 10.673386573791504,
|
10514 |
+
"learning_rate": 8.918999081222156e-09,
|
10515 |
+
"logits/chosen": -10.80329418182373,
|
10516 |
+
"logits/rejected": -10.805293083190918,
|
10517 |
+
"logps/chosen": -24.16779899597168,
|
10518 |
+
"logps/rejected": -61.346763610839844,
|
10519 |
+
"loss": 0.2918,
|
10520 |
+
"rewards/accuracies": 0.78125,
|
10521 |
+
"rewards/chosen": -0.43302100896835327,
|
10522 |
+
"rewards/margins": 3.563382625579834,
|
10523 |
+
"rewards/rejected": -3.996403694152832,
|
10524 |
+
"step": 1402
|
10525 |
+
},
|
10526 |
+
{
|
10527 |
+
"epoch": 1.7469476631153278,
|
10528 |
+
"grad_norm": 1.928539514541626,
|
10529 |
+
"learning_rate": 8.747557158265073e-09,
|
10530 |
+
"logits/chosen": -10.844294548034668,
|
10531 |
+
"logits/rejected": -10.835131645202637,
|
10532 |
+
"logps/chosen": -25.480384826660156,
|
10533 |
+
"logps/rejected": -57.03585433959961,
|
10534 |
+
"loss": 0.3263,
|
10535 |
+
"rewards/accuracies": 0.75,
|
10536 |
+
"rewards/chosen": -0.5597760677337646,
|
10537 |
+
"rewards/margins": 2.9487338066101074,
|
10538 |
+
"rewards/rejected": -3.508509874343872,
|
10539 |
+
"step": 1404
|
10540 |
+
},
|
10541 |
+
{
|
10542 |
+
"epoch": 1.749436192549965,
|
10543 |
+
"grad_norm": 12.497711181640625,
|
10544 |
+
"learning_rate": 8.577703612573783e-09,
|
10545 |
+
"logits/chosen": -10.82893180847168,
|
10546 |
+
"logits/rejected": -10.83089828491211,
|
10547 |
+
"logps/chosen": -24.069276809692383,
|
10548 |
+
"logps/rejected": -66.67253112792969,
|
10549 |
+
"loss": 0.3401,
|
10550 |
+
"rewards/accuracies": 0.6875,
|
10551 |
+
"rewards/chosen": -0.4053136110305786,
|
10552 |
+
"rewards/margins": 4.1401472091674805,
|
10553 |
+
"rewards/rejected": -4.545460224151611,
|
10554 |
+
"step": 1406
|
10555 |
+
},
|
10556 |
+
{
|
10557 |
+
"epoch": 1.7519247219846021,
|
10558 |
+
"grad_norm": 22.551191329956055,
|
10559 |
+
"learning_rate": 8.409441400688399e-09,
|
10560 |
+
"logits/chosen": -10.837417602539062,
|
10561 |
+
"logits/rejected": -10.837981224060059,
|
10562 |
+
"logps/chosen": -21.41533660888672,
|
10563 |
+
"logps/rejected": -61.951133728027344,
|
10564 |
+
"loss": 0.3606,
|
10565 |
+
"rewards/accuracies": 0.75,
|
10566 |
+
"rewards/chosen": -0.19989776611328125,
|
10567 |
+
"rewards/margins": 3.897480010986328,
|
10568 |
+
"rewards/rejected": -4.097377777099609,
|
10569 |
+
"step": 1408
|
10570 |
+
},
|
10571 |
+
{
|
10572 |
+
"epoch": 1.7544132514192394,
|
10573 |
+
"grad_norm": 28.929887771606445,
|
10574 |
+
"learning_rate": 8.24277345144967e-09,
|
10575 |
+
"logits/chosen": -10.846136093139648,
|
10576 |
+
"logits/rejected": -10.846277236938477,
|
10577 |
+
"logps/chosen": -26.540502548217773,
|
10578 |
+
"logps/rejected": -40.02751159667969,
|
10579 |
+
"loss": 0.3718,
|
10580 |
+
"rewards/accuracies": 0.5,
|
10581 |
+
"rewards/chosen": -0.6883711814880371,
|
10582 |
+
"rewards/margins": 1.336569905281067,
|
10583 |
+
"rewards/rejected": -2.0249409675598145,
|
10584 |
+
"step": 1410
|
10585 |
+
},
|
10586 |
+
{
|
10587 |
+
"epoch": 1.7569017808538767,
|
10588 |
+
"grad_norm": 8.628479957580566,
|
10589 |
+
"learning_rate": 8.077702665947973e-09,
|
10590 |
+
"logits/chosen": -10.839735984802246,
|
10591 |
+
"logits/rejected": -10.83474063873291,
|
10592 |
+
"logps/chosen": -22.964393615722656,
|
10593 |
+
"logps/rejected": -69.26115417480469,
|
10594 |
+
"loss": 0.3341,
|
10595 |
+
"rewards/accuracies": 0.75,
|
10596 |
+
"rewards/chosen": -0.2930600941181183,
|
10597 |
+
"rewards/margins": 4.422696590423584,
|
10598 |
+
"rewards/rejected": -4.715756416320801,
|
10599 |
+
"step": 1412
|
10600 |
+
},
|
10601 |
+
{
|
10602 |
+
"epoch": 1.7593903102885138,
|
10603 |
+
"grad_norm": 28.3585205078125,
|
10604 |
+
"learning_rate": 7.914231917472746e-09,
|
10605 |
+
"logits/chosen": -10.830848693847656,
|
10606 |
+
"logits/rejected": -10.833673477172852,
|
10607 |
+
"logps/chosen": -23.10784149169922,
|
10608 |
+
"logps/rejected": -69.79341125488281,
|
10609 |
+
"loss": 0.3478,
|
10610 |
+
"rewards/accuracies": 0.75,
|
10611 |
+
"rewards/chosen": -0.35468006134033203,
|
10612 |
+
"rewards/margins": 4.51253604888916,
|
10613 |
+
"rewards/rejected": -4.867216110229492,
|
10614 |
+
"step": 1414
|
10615 |
+
},
|
10616 |
+
{
|
10617 |
+
"epoch": 1.761878839723151,
|
10618 |
+
"grad_norm": 8.422154426574707,
|
10619 |
+
"learning_rate": 7.75236405146258e-09,
|
10620 |
+
"logits/chosen": -10.832451820373535,
|
10621 |
+
"logits/rejected": -10.834232330322266,
|
10622 |
+
"logps/chosen": -28.385543823242188,
|
10623 |
+
"logps/rejected": -70.90185546875,
|
10624 |
+
"loss": 0.3218,
|
10625 |
+
"rewards/accuracies": 0.65625,
|
10626 |
+
"rewards/chosen": -0.7882261276245117,
|
10627 |
+
"rewards/margins": 4.108382701873779,
|
10628 |
+
"rewards/rejected": -4.896608829498291,
|
10629 |
+
"step": 1416
|
10630 |
+
},
|
10631 |
+
{
|
10632 |
+
"epoch": 1.7643673691577884,
|
10633 |
+
"grad_norm": 29.272140502929688,
|
10634 |
+
"learning_rate": 7.592101885455593e-09,
|
10635 |
+
"logits/chosen": -10.832554817199707,
|
10636 |
+
"logits/rejected": -10.830697059631348,
|
10637 |
+
"logps/chosen": -25.028911590576172,
|
10638 |
+
"logps/rejected": -58.800750732421875,
|
10639 |
+
"loss": 0.3287,
|
10640 |
+
"rewards/accuracies": 0.65625,
|
10641 |
+
"rewards/chosen": -0.5799860954284668,
|
10642 |
+
"rewards/margins": 3.158944606781006,
|
10643 |
+
"rewards/rejected": -3.7389309406280518,
|
10644 |
+
"step": 1418
|
10645 |
+
},
|
10646 |
+
{
|
10647 |
+
"epoch": 1.7668558985924254,
|
10648 |
+
"grad_norm": 63.721107482910156,
|
10649 |
+
"learning_rate": 7.4334482090404935e-09,
|
10650 |
+
"logits/chosen": -10.820099830627441,
|
10651 |
+
"logits/rejected": -10.814040184020996,
|
10652 |
+
"logps/chosen": -25.835189819335938,
|
10653 |
+
"logps/rejected": -54.49407958984375,
|
10654 |
+
"loss": 0.3232,
|
10655 |
+
"rewards/accuracies": 0.65625,
|
10656 |
+
"rewards/chosen": -0.6330866813659668,
|
10657 |
+
"rewards/margins": 2.6440882682800293,
|
10658 |
+
"rewards/rejected": -3.2771754264831543,
|
10659 |
+
"step": 1420
|
10660 |
+
},
|
10661 |
+
{
|
10662 |
+
"epoch": 1.7693444280270627,
|
10663 |
+
"grad_norm": 7.655338764190674,
|
10664 |
+
"learning_rate": 7.276405783807893e-09,
|
10665 |
+
"logits/chosen": -10.817934036254883,
|
10666 |
+
"logits/rejected": -10.813355445861816,
|
10667 |
+
"logps/chosen": -23.41266632080078,
|
10668 |
+
"logps/rejected": -64.01387786865234,
|
10669 |
+
"loss": 0.3305,
|
10670 |
+
"rewards/accuracies": 0.6875,
|
10671 |
+
"rewards/chosen": -0.35830530524253845,
|
10672 |
+
"rewards/margins": 3.9317524433135986,
|
10673 |
+
"rewards/rejected": -4.29005765914917,
|
10674 |
+
"step": 1422
|
10675 |
+
},
|
10676 |
+
{
|
10677 |
+
"epoch": 1.7718329574617,
|
10678 |
+
"grad_norm": 16.495460510253906,
|
10679 |
+
"learning_rate": 7.120977343302359e-09,
|
10680 |
+
"logits/chosen": -10.812400817871094,
|
10681 |
+
"logits/rejected": -10.806700706481934,
|
10682 |
+
"logps/chosen": -26.264169692993164,
|
10683 |
+
"logps/rejected": -59.75008010864258,
|
10684 |
+
"loss": 0.316,
|
10685 |
+
"rewards/accuracies": 0.6875,
|
10686 |
+
"rewards/chosen": -0.6269797682762146,
|
10687 |
+
"rewards/margins": 3.2161946296691895,
|
10688 |
+
"rewards/rejected": -3.8431742191314697,
|
10689 |
+
"step": 1424
|
10690 |
+
},
|
10691 |
+
{
|
10692 |
+
"epoch": 1.774321486896337,
|
10693 |
+
"grad_norm": 52.430747985839844,
|
10694 |
+
"learning_rate": 6.9671655929747884e-09,
|
10695 |
+
"logits/chosen": -10.831871032714844,
|
10696 |
+
"logits/rejected": -10.833426475524902,
|
10697 |
+
"logps/chosen": -24.449472427368164,
|
10698 |
+
"logps/rejected": -45.51153564453125,
|
10699 |
+
"loss": 0.3439,
|
10700 |
+
"rewards/accuracies": 0.5,
|
10701 |
+
"rewards/chosen": -0.46670615673065186,
|
10702 |
+
"rewards/margins": 2.025547981262207,
|
10703 |
+
"rewards/rejected": -2.4922542572021484,
|
10704 |
+
"step": 1426
|
10705 |
+
},
|
10706 |
+
{
|
10707 |
+
"epoch": 1.7768100163309746,
|
10708 |
+
"grad_norm": 1.0906507968902588,
|
10709 |
+
"learning_rate": 6.814973210135255e-09,
|
10710 |
+
"logits/chosen": -10.85045051574707,
|
10711 |
+
"logits/rejected": -10.853729248046875,
|
10712 |
+
"logps/chosen": -23.105697631835938,
|
10713 |
+
"logps/rejected": -72.16358184814453,
|
10714 |
+
"loss": 0.285,
|
10715 |
+
"rewards/accuracies": 0.8125,
|
10716 |
+
"rewards/chosen": -0.36262983083724976,
|
10717 |
+
"rewards/margins": 4.786954879760742,
|
10718 |
+
"rewards/rejected": -5.149584770202637,
|
10719 |
+
"step": 1428
|
10720 |
+
},
|
10721 |
+
{
|
10722 |
+
"epoch": 1.7792985457656116,
|
10723 |
+
"grad_norm": 4.459844589233398,
|
10724 |
+
"learning_rate": 6.664402843906514e-09,
|
10725 |
+
"logits/chosen": -10.799229621887207,
|
10726 |
+
"logits/rejected": -10.7999267578125,
|
10727 |
+
"logps/chosen": -31.46733283996582,
|
10728 |
+
"logps/rejected": -65.57007598876953,
|
10729 |
+
"loss": 0.299,
|
10730 |
+
"rewards/accuracies": 0.78125,
|
10731 |
+
"rewards/chosen": -1.1061089038848877,
|
10732 |
+
"rewards/margins": 3.301386833190918,
|
10733 |
+
"rewards/rejected": -4.407495975494385,
|
10734 |
+
"step": 1430
|
10735 |
+
},
|
10736 |
+
{
|
10737 |
+
"epoch": 1.7817870752002487,
|
10738 |
+
"grad_norm": 15.899592399597168,
|
10739 |
+
"learning_rate": 6.515457115177802e-09,
|
10740 |
+
"logits/chosen": -10.796611785888672,
|
10741 |
+
"logits/rejected": -10.7985258102417,
|
10742 |
+
"logps/chosen": -28.31963539123535,
|
10743 |
+
"logps/rejected": -66.415771484375,
|
10744 |
+
"loss": 0.2964,
|
10745 |
+
"rewards/accuracies": 0.625,
|
10746 |
+
"rewards/chosen": -0.8590655326843262,
|
10747 |
+
"rewards/margins": 3.653881549835205,
|
10748 |
+
"rewards/rejected": -4.512947082519531,
|
10749 |
+
"step": 1432
|
10750 |
+
},
|
10751 |
+
{
|
10752 |
+
"epoch": 1.7842756046348862,
|
10753 |
+
"grad_norm": 19.590593338012695,
|
10754 |
+
"learning_rate": 6.368138616559282e-09,
|
10755 |
+
"logits/chosen": -10.814291000366211,
|
10756 |
+
"logits/rejected": -10.816354751586914,
|
10757 |
+
"logps/chosen": -27.344654083251953,
|
10758 |
+
"logps/rejected": -57.43811798095703,
|
10759 |
+
"loss": 0.3212,
|
10760 |
+
"rewards/accuracies": 0.75,
|
10761 |
+
"rewards/chosen": -0.7679904103279114,
|
10762 |
+
"rewards/margins": 2.95462703704834,
|
10763 |
+
"rewards/rejected": -3.7226176261901855,
|
10764 |
+
"step": 1434
|
10765 |
+
},
|
10766 |
+
{
|
10767 |
+
"epoch": 1.7867641340695233,
|
10768 |
+
"grad_norm": 114.24454498291016,
|
10769 |
+
"learning_rate": 6.2224499123368576e-09,
|
10770 |
+
"logits/chosen": -10.82932186126709,
|
10771 |
+
"logits/rejected": -10.82844066619873,
|
10772 |
+
"logps/chosen": -28.379507064819336,
|
10773 |
+
"logps/rejected": -47.598514556884766,
|
10774 |
+
"loss": 0.4161,
|
10775 |
+
"rewards/accuracies": 0.6875,
|
10776 |
+
"rewards/chosen": -0.8367791175842285,
|
10777 |
+
"rewards/margins": 1.8975491523742676,
|
10778 |
+
"rewards/rejected": -2.734328269958496,
|
10779 |
+
"step": 1436
|
10780 |
+
},
|
10781 |
+
{
|
10782 |
+
"epoch": 1.7892526635041603,
|
10783 |
+
"grad_norm": 6.041500091552734,
|
10784 |
+
"learning_rate": 6.078393538427573e-09,
|
10785 |
+
"logits/chosen": -10.825316429138184,
|
10786 |
+
"logits/rejected": -10.831624031066895,
|
10787 |
+
"logps/chosen": -28.40496063232422,
|
10788 |
+
"logps/rejected": -74.67303466796875,
|
10789 |
+
"loss": 0.3551,
|
10790 |
+
"rewards/accuracies": 0.65625,
|
10791 |
+
"rewards/chosen": -0.8339643478393555,
|
10792 |
+
"rewards/margins": 4.416720390319824,
|
10793 |
+
"rewards/rejected": -5.25068473815918,
|
10794 |
+
"step": 1438
|
10795 |
+
},
|
10796 |
+
{
|
10797 |
+
"epoch": 1.7917411929387979,
|
10798 |
+
"grad_norm": 5.8538737297058105,
|
10799 |
+
"learning_rate": 5.93597200233551e-09,
|
10800 |
+
"logits/chosen": -10.814746856689453,
|
10801 |
+
"logits/rejected": -10.814842224121094,
|
10802 |
+
"logps/chosen": -25.401987075805664,
|
10803 |
+
"logps/rejected": -66.26980590820312,
|
10804 |
+
"loss": 0.2539,
|
10805 |
+
"rewards/accuracies": 0.84375,
|
10806 |
+
"rewards/chosen": -0.5636205673217773,
|
10807 |
+
"rewards/margins": 3.9558775424957275,
|
10808 |
+
"rewards/rejected": -4.519497871398926,
|
10809 |
+
"step": 1440
|
10810 |
+
},
|
10811 |
+
{
|
10812 |
+
"epoch": 1.794229722373435,
|
10813 |
+
"grad_norm": 32.84010314941406,
|
10814 |
+
"learning_rate": 5.795187783108002e-09,
|
10815 |
+
"logits/chosen": -10.800264358520508,
|
10816 |
+
"logits/rejected": -10.800673484802246,
|
10817 |
+
"logps/chosen": -29.123065948486328,
|
10818 |
+
"logps/rejected": -67.73751831054688,
|
10819 |
+
"loss": 0.3369,
|
10820 |
+
"rewards/accuracies": 0.6875,
|
10821 |
+
"rewards/chosen": -0.9221575260162354,
|
10822 |
+
"rewards/margins": 3.7697463035583496,
|
10823 |
+
"rewards/rejected": -4.691904067993164,
|
10824 |
+
"step": 1442
|
10825 |
+
},
|
10826 |
+
{
|
10827 |
+
"epoch": 1.7967182518080722,
|
10828 |
+
"grad_norm": 7.772801399230957,
|
10829 |
+
"learning_rate": 5.656043331292681e-09,
|
10830 |
+
"logits/chosen": -10.818206787109375,
|
10831 |
+
"logits/rejected": -10.823054313659668,
|
10832 |
+
"logps/chosen": -23.672134399414062,
|
10833 |
+
"logps/rejected": -51.89259719848633,
|
10834 |
+
"loss": 0.3593,
|
10835 |
+
"rewards/accuracies": 0.71875,
|
10836 |
+
"rewards/chosen": -0.4020726680755615,
|
10837 |
+
"rewards/margins": 2.701967716217041,
|
10838 |
+
"rewards/rejected": -3.1040403842926025,
|
10839 |
+
"step": 1444
|
10840 |
+
},
|
10841 |
+
{
|
10842 |
+
"epoch": 1.7992067812427095,
|
10843 |
+
"grad_norm": 6.961812973022461,
|
10844 |
+
"learning_rate": 5.518541068894622e-09,
|
10845 |
+
"logits/chosen": -10.823866844177246,
|
10846 |
+
"logits/rejected": -10.824728012084961,
|
10847 |
+
"logps/chosen": -33.00081253051758,
|
10848 |
+
"logps/rejected": -69.7044906616211,
|
10849 |
+
"loss": 0.3134,
|
10850 |
+
"rewards/accuracies": 0.78125,
|
10851 |
+
"rewards/chosen": -1.3416486978530884,
|
10852 |
+
"rewards/margins": 3.5815513134002686,
|
10853 |
+
"rewards/rejected": -4.9232001304626465,
|
10854 |
+
"step": 1446
|
10855 |
+
},
|
10856 |
+
{
|
10857 |
+
"epoch": 1.8016953106773466,
|
10858 |
+
"grad_norm": 31.4688720703125,
|
10859 |
+
"learning_rate": 5.382683389334375e-09,
|
10860 |
+
"logits/chosen": -10.822563171386719,
|
10861 |
+
"logits/rejected": -10.822004318237305,
|
10862 |
+
"logps/chosen": -21.888166427612305,
|
10863 |
+
"logps/rejected": -52.723751068115234,
|
10864 |
+
"loss": 0.339,
|
10865 |
+
"rewards/accuracies": 0.59375,
|
10866 |
+
"rewards/chosen": -0.21833878755569458,
|
10867 |
+
"rewards/margins": 2.944748640060425,
|
10868 |
+
"rewards/rejected": -3.1630876064300537,
|
10869 |
+
"step": 1448
|
10870 |
+
},
|
10871 |
+
{
|
10872 |
+
"epoch": 1.8041838401119838,
|
10873 |
+
"grad_norm": 14.762541770935059,
|
10874 |
+
"learning_rate": 5.248472657406122e-09,
|
10875 |
+
"logits/chosen": -10.821406364440918,
|
10876 |
+
"logits/rejected": -10.816853523254395,
|
10877 |
+
"logps/chosen": -28.940649032592773,
|
10878 |
+
"logps/rejected": -57.142417907714844,
|
10879 |
+
"loss": 0.3214,
|
10880 |
+
"rewards/accuracies": 0.6875,
|
10881 |
+
"rewards/chosen": -0.9556154608726501,
|
10882 |
+
"rewards/margins": 2.6828153133392334,
|
10883 |
+
"rewards/rejected": -3.638430595397949,
|
10884 |
+
"step": 1450
|
10885 |
+
},
|
10886 |
+
{
|
10887 |
+
"epoch": 1.8066723695466211,
|
10888 |
+
"grad_norm": 11.76990032196045,
|
10889 |
+
"learning_rate": 5.1159112092366676e-09,
|
10890 |
+
"logits/chosen": -10.837207794189453,
|
10891 |
+
"logits/rejected": -10.829712867736816,
|
10892 |
+
"logps/chosen": -29.688844680786133,
|
10893 |
+
"logps/rejected": -66.00604248046875,
|
10894 |
+
"loss": 0.3339,
|
10895 |
+
"rewards/accuracies": 0.65625,
|
10896 |
+
"rewards/chosen": -0.9291807413101196,
|
10897 |
+
"rewards/margins": 3.522550106048584,
|
10898 |
+
"rewards/rejected": -4.451730728149414,
|
10899 |
+
"step": 1452
|
10900 |
+
},
|
10901 |
+
{
|
10902 |
+
"epoch": 1.8091608989812582,
|
10903 |
+
"grad_norm": 2.2030696868896484,
|
10904 |
+
"learning_rate": 4.985001352244666e-09,
|
10905 |
+
"logits/chosen": -10.811424255371094,
|
10906 |
+
"logits/rejected": -10.815189361572266,
|
10907 |
+
"logps/chosen": -24.620361328125,
|
10908 |
+
"logps/rejected": -64.42039489746094,
|
10909 |
+
"loss": 0.3568,
|
10910 |
+
"rewards/accuracies": 0.65625,
|
10911 |
+
"rewards/chosen": -0.49651896953582764,
|
10912 |
+
"rewards/margins": 3.8992347717285156,
|
10913 |
+
"rewards/rejected": -4.395754337310791,
|
10914 |
+
"step": 1454
|
10915 |
+
},
|
10916 |
+
{
|
10917 |
+
"epoch": 1.8116494284158955,
|
10918 |
+
"grad_norm": 32.40312194824219,
|
10919 |
+
"learning_rate": 4.855745365100538e-09,
|
10920 |
+
"logits/chosen": -10.825475692749023,
|
10921 |
+
"logits/rejected": -10.827003479003906,
|
10922 |
+
"logps/chosen": -33.1482048034668,
|
10923 |
+
"logps/rejected": -60.821807861328125,
|
10924 |
+
"loss": 0.3544,
|
10925 |
+
"rewards/accuracies": 0.71875,
|
10926 |
+
"rewards/chosen": -1.3415831327438354,
|
10927 |
+
"rewards/margins": 2.7061212062835693,
|
10928 |
+
"rewards/rejected": -4.047704696655273,
|
10929 |
+
"step": 1456
|
10930 |
+
},
|
10931 |
+
{
|
10932 |
+
"epoch": 1.8141379578505328,
|
10933 |
+
"grad_norm": 5.195944786071777,
|
10934 |
+
"learning_rate": 4.728145497686753e-09,
|
10935 |
+
"logits/chosen": -10.808531761169434,
|
10936 |
+
"logits/rejected": -10.80744743347168,
|
10937 |
+
"logps/chosen": -27.157238006591797,
|
10938 |
+
"logps/rejected": -56.74330139160156,
|
10939 |
+
"loss": 0.3454,
|
10940 |
+
"rewards/accuracies": 0.6875,
|
10941 |
+
"rewards/chosen": -0.7516356706619263,
|
10942 |
+
"rewards/margins": 2.846492290496826,
|
10943 |
+
"rewards/rejected": -3.598127841949463,
|
10944 |
+
"step": 1458
|
10945 |
+
},
|
10946 |
+
{
|
10947 |
+
"epoch": 1.8166264872851698,
|
10948 |
+
"grad_norm": 3.704167366027832,
|
10949 |
+
"learning_rate": 4.60220397105866e-09,
|
10950 |
+
"logits/chosen": -10.799447059631348,
|
10951 |
+
"logits/rejected": -10.80561351776123,
|
10952 |
+
"logps/chosen": -26.86786651611328,
|
10953 |
+
"logps/rejected": -77.04949188232422,
|
10954 |
+
"loss": 0.2995,
|
10955 |
+
"rewards/accuracies": 0.75,
|
10956 |
+
"rewards/chosen": -0.7065700888633728,
|
10957 |
+
"rewards/margins": 4.82396125793457,
|
10958 |
+
"rewards/rejected": -5.530531406402588,
|
10959 |
+
"step": 1460
|
10960 |
+
},
|
10961 |
+
{
|
10962 |
+
"epoch": 1.8191150167198071,
|
10963 |
+
"grad_norm": 4.062532424926758,
|
10964 |
+
"learning_rate": 4.477922977405912e-09,
|
10965 |
+
"logits/chosen": -10.818702697753906,
|
10966 |
+
"logits/rejected": -10.819602966308594,
|
10967 |
+
"logps/chosen": -21.75606346130371,
|
10968 |
+
"logps/rejected": -57.006935119628906,
|
10969 |
+
"loss": 0.297,
|
10970 |
+
"rewards/accuracies": 0.6875,
|
10971 |
+
"rewards/chosen": -0.16213519871234894,
|
10972 |
+
"rewards/margins": 3.5094478130340576,
|
10973 |
+
"rewards/rejected": -3.6715829372406006,
|
10974 |
+
"step": 1462
|
10975 |
+
},
|
10976 |
+
{
|
10977 |
+
"epoch": 1.8216035461544444,
|
10978 |
+
"grad_norm": 24.149431228637695,
|
10979 |
+
"learning_rate": 4.355304680014171e-09,
|
10980 |
+
"logits/chosen": -10.831608772277832,
|
10981 |
+
"logits/rejected": -10.84537410736084,
|
10982 |
+
"logps/chosen": -24.60963249206543,
|
10983 |
+
"logps/rejected": -69.60423278808594,
|
10984 |
+
"loss": 0.3599,
|
10985 |
+
"rewards/accuracies": 0.59375,
|
10986 |
+
"rewards/chosen": -0.5038677453994751,
|
10987 |
+
"rewards/margins": 4.358825206756592,
|
10988 |
+
"rewards/rejected": -4.862692832946777,
|
10989 |
+
"step": 1464
|
10990 |
+
},
|
10991 |
+
{
|
10992 |
+
"epoch": 1.8240920755890815,
|
10993 |
+
"grad_norm": 5.768329620361328,
|
10994 |
+
"learning_rate": 4.234351213227605e-09,
|
10995 |
+
"logits/chosen": -10.844436645507812,
|
10996 |
+
"logits/rejected": -10.842813491821289,
|
10997 |
+
"logps/chosen": -22.979904174804688,
|
10998 |
+
"logps/rejected": -51.99818801879883,
|
10999 |
+
"loss": 0.331,
|
11000 |
+
"rewards/accuracies": 0.59375,
|
11001 |
+
"rewards/chosen": -0.32598400115966797,
|
11002 |
+
"rewards/margins": 2.7477188110351562,
|
11003 |
+
"rewards/rejected": -3.073702812194824,
|
11004 |
+
"step": 1466
|
11005 |
+
},
|
11006 |
+
{
|
11007 |
+
"epoch": 1.8265806050237188,
|
11008 |
+
"grad_norm": 7.735873222351074,
|
11009 |
+
"learning_rate": 4.1150646824116064e-09,
|
11010 |
+
"logits/chosen": -10.821906089782715,
|
11011 |
+
"logits/rejected": -10.81991195678711,
|
11012 |
+
"logps/chosen": -23.383838653564453,
|
11013 |
+
"logps/rejected": -50.20802688598633,
|
11014 |
+
"loss": 0.3584,
|
11015 |
+
"rewards/accuracies": 0.59375,
|
11016 |
+
"rewards/chosen": -0.32233789563179016,
|
11017 |
+
"rewards/margins": 2.645047426223755,
|
11018 |
+
"rewards/rejected": -2.9673855304718018,
|
11019 |
+
"step": 1468
|
11020 |
+
},
|
11021 |
+
{
|
11022 |
+
"epoch": 1.829069134458356,
|
11023 |
+
"grad_norm": 9.88779067993164,
|
11024 |
+
"learning_rate": 3.997447163916223e-09,
|
11025 |
+
"logits/chosen": -10.780189514160156,
|
11026 |
+
"logits/rejected": -10.784330368041992,
|
11027 |
+
"logps/chosen": -26.26874542236328,
|
11028 |
+
"logps/rejected": -64.21620178222656,
|
11029 |
+
"loss": 0.315,
|
11030 |
+
"rewards/accuracies": 0.625,
|
11031 |
+
"rewards/chosen": -0.6374044418334961,
|
11032 |
+
"rewards/margins": 3.6502692699432373,
|
11033 |
+
"rewards/rejected": -4.287673473358154,
|
11034 |
+
"step": 1470
|
11035 |
+
},
|
11036 |
+
{
|
11037 |
+
"epoch": 1.8315576638929931,
|
11038 |
+
"grad_norm": 3.3723583221435547,
|
11039 |
+
"learning_rate": 3.8815007050399975e-09,
|
11040 |
+
"logits/chosen": -10.845205307006836,
|
11041 |
+
"logits/rejected": -10.850592613220215,
|
11042 |
+
"logps/chosen": -24.638830184936523,
|
11043 |
+
"logps/rejected": -62.163787841796875,
|
11044 |
+
"loss": 0.3005,
|
11045 |
+
"rewards/accuracies": 0.6875,
|
11046 |
+
"rewards/chosen": -0.4939250349998474,
|
11047 |
+
"rewards/margins": 3.6708312034606934,
|
11048 |
+
"rewards/rejected": -4.1647562980651855,
|
11049 |
+
"step": 1472
|
11050 |
+
},
|
11051 |
+
{
|
11052 |
+
"epoch": 1.8340461933276304,
|
11053 |
+
"grad_norm": 2.0998055934906006,
|
11054 |
+
"learning_rate": 3.767227323994293e-09,
|
11055 |
+
"logits/chosen": -10.836874008178711,
|
11056 |
+
"logits/rejected": -10.8333740234375,
|
11057 |
+
"logps/chosen": -20.189462661743164,
|
11058 |
+
"logps/rejected": -55.19818878173828,
|
11059 |
+
"loss": 0.3569,
|
11060 |
+
"rewards/accuracies": 0.46875,
|
11061 |
+
"rewards/chosen": -0.0895194411277771,
|
11062 |
+
"rewards/margins": 3.2878525257110596,
|
11063 |
+
"rewards/rejected": -3.3773720264434814,
|
11064 |
+
"step": 1474
|
11065 |
+
},
|
11066 |
+
{
|
11067 |
+
"epoch": 1.8365347227622677,
|
11068 |
+
"grad_norm": 18.310754776000977,
|
11069 |
+
"learning_rate": 3.6546290098682485e-09,
|
11070 |
+
"logits/chosen": -10.863044738769531,
|
11071 |
+
"logits/rejected": -10.86330509185791,
|
11072 |
+
"logps/chosen": -24.017717361450195,
|
11073 |
+
"logps/rejected": -58.69614028930664,
|
11074 |
+
"loss": 0.3456,
|
11075 |
+
"rewards/accuracies": 0.6875,
|
11076 |
+
"rewards/chosen": -0.43135157227516174,
|
11077 |
+
"rewards/margins": 3.3879075050354004,
|
11078 |
+
"rewards/rejected": -3.819258689880371,
|
11079 |
+
"step": 1476
|
11080 |
+
},
|
11081 |
+
{
|
11082 |
+
"epoch": 1.8390232521969048,
|
11083 |
+
"grad_norm": 3.3576934337615967,
|
11084 |
+
"learning_rate": 3.543707722594069e-09,
|
11085 |
+
"logits/chosen": -10.862442016601562,
|
11086 |
+
"logits/rejected": -10.859800338745117,
|
11087 |
+
"logps/chosen": -27.191797256469727,
|
11088 |
+
"logps/rejected": -71.88859558105469,
|
11089 |
+
"loss": 0.3095,
|
11090 |
+
"rewards/accuracies": 0.6875,
|
11091 |
+
"rewards/chosen": -0.7576779723167419,
|
11092 |
+
"rewards/margins": 4.214293479919434,
|
11093 |
+
"rewards/rejected": -4.971971035003662,
|
11094 |
+
"step": 1478
|
11095 |
+
},
|
11096 |
+
{
|
11097 |
+
"epoch": 1.841511781631542,
|
11098 |
+
"grad_norm": 6.704062461853027,
|
11099 |
+
"learning_rate": 3.4344653929129554e-09,
|
11100 |
+
"logits/chosen": -10.799659729003906,
|
11101 |
+
"logits/rejected": -10.798733711242676,
|
11102 |
+
"logps/chosen": -19.196088790893555,
|
11103 |
+
"logps/rejected": -47.406768798828125,
|
11104 |
+
"loss": 0.3276,
|
11105 |
+
"rewards/accuracies": 0.6875,
|
11106 |
+
"rewards/chosen": 0.06628356873989105,
|
11107 |
+
"rewards/margins": 2.775508403778076,
|
11108 |
+
"rewards/rejected": -2.7092249393463135,
|
11109 |
+
"step": 1480
|
11110 |
+
},
|
11111 |
+
{
|
11112 |
+
"epoch": 1.8440003110661793,
|
11113 |
+
"grad_norm": 7.278040409088135,
|
11114 |
+
"learning_rate": 3.326903922341473e-09,
|
11115 |
+
"logits/chosen": -10.815013885498047,
|
11116 |
+
"logits/rejected": -10.810517311096191,
|
11117 |
+
"logps/chosen": -23.322364807128906,
|
11118 |
+
"logps/rejected": -60.02410888671875,
|
11119 |
+
"loss": 0.3409,
|
11120 |
+
"rewards/accuracies": 0.71875,
|
11121 |
+
"rewards/chosen": -0.3628728985786438,
|
11122 |
+
"rewards/margins": 3.500354290008545,
|
11123 |
+
"rewards/rejected": -3.863227367401123,
|
11124 |
+
"step": 1482
|
11125 |
+
},
|
11126 |
+
{
|
11127 |
+
"epoch": 1.8464888405008164,
|
11128 |
+
"grad_norm": 18.574892044067383,
|
11129 |
+
"learning_rate": 3.221025183138493e-09,
|
11130 |
+
"logits/chosen": -10.82407283782959,
|
11131 |
+
"logits/rejected": -10.823734283447266,
|
11132 |
+
"logps/chosen": -25.93548011779785,
|
11133 |
+
"logps/rejected": -61.550941467285156,
|
11134 |
+
"loss": 0.2756,
|
11135 |
+
"rewards/accuracies": 0.6875,
|
11136 |
+
"rewards/chosen": -0.6312334537506104,
|
11137 |
+
"rewards/margins": 3.374722957611084,
|
11138 |
+
"rewards/rejected": -4.005956649780273,
|
11139 |
+
"step": 1484
|
11140 |
+
},
|
11141 |
+
{
|
11142 |
+
"epoch": 1.848977369935454,
|
11143 |
+
"grad_norm": 28.313570022583008,
|
11144 |
+
"learning_rate": 3.116831018272581e-09,
|
11145 |
+
"logits/chosen": -10.880552291870117,
|
11146 |
+
"logits/rejected": -10.877769470214844,
|
11147 |
+
"logps/chosen": -22.949565887451172,
|
11148 |
+
"logps/rejected": -65.82709503173828,
|
11149 |
+
"loss": 0.2973,
|
11150 |
+
"rewards/accuracies": 0.78125,
|
11151 |
+
"rewards/chosen": -0.3210931420326233,
|
11152 |
+
"rewards/margins": 4.221843719482422,
|
11153 |
+
"rewards/rejected": -4.5429368019104,
|
11154 |
+
"step": 1486
|
11155 |
+
},
|
11156 |
+
{
|
11157 |
+
"epoch": 1.851465899370091,
|
11158 |
+
"grad_norm": 50.520294189453125,
|
11159 |
+
"learning_rate": 3.0143232413898602e-09,
|
11160 |
+
"logits/chosen": -10.797541618347168,
|
11161 |
+
"logits/rejected": -10.794352531433105,
|
11162 |
+
"logps/chosen": -28.862197875976562,
|
11163 |
+
"logps/rejected": -75.26273345947266,
|
11164 |
+
"loss": 0.2318,
|
11165 |
+
"rewards/accuracies": 0.8125,
|
11166 |
+
"rewards/chosen": -0.8763686418533325,
|
11167 |
+
"rewards/margins": 4.540627956390381,
|
11168 |
+
"rewards/rejected": -5.416996479034424,
|
11169 |
+
"step": 1488
|
11170 |
+
},
|
11171 |
+
{
|
11172 |
+
"epoch": 1.853954428804728,
|
11173 |
+
"grad_norm": 8.633639335632324,
|
11174 |
+
"learning_rate": 2.913503636782577e-09,
|
11175 |
+
"logits/chosen": -10.839570999145508,
|
11176 |
+
"logits/rejected": -10.838019371032715,
|
11177 |
+
"logps/chosen": -22.53410530090332,
|
11178 |
+
"logps/rejected": -46.19023513793945,
|
11179 |
+
"loss": 0.3611,
|
11180 |
+
"rewards/accuracies": 0.5,
|
11181 |
+
"rewards/chosen": -0.3035596013069153,
|
11182 |
+
"rewards/margins": 2.2712674140930176,
|
11183 |
+
"rewards/rejected": -2.574826955795288,
|
11184 |
+
"step": 1490
|
11185 |
+
},
|
11186 |
+
{
|
11187 |
+
"epoch": 1.8564429582393656,
|
11188 |
+
"grad_norm": 5.074777126312256,
|
11189 |
+
"learning_rate": 2.8143739593578853e-09,
|
11190 |
+
"logits/chosen": -10.803709030151367,
|
11191 |
+
"logits/rejected": -10.803434371948242,
|
11192 |
+
"logps/chosen": -23.481767654418945,
|
11193 |
+
"logps/rejected": -63.84934997558594,
|
11194 |
+
"loss": 0.3185,
|
11195 |
+
"rewards/accuracies": 0.65625,
|
11196 |
+
"rewards/chosen": -0.3606772720813751,
|
11197 |
+
"rewards/margins": 3.896145820617676,
|
11198 |
+
"rewards/rejected": -4.2568230628967285,
|
11199 |
+
"step": 1492
|
11200 |
+
},
|
11201 |
+
{
|
11202 |
+
"epoch": 1.8589314876740026,
|
11203 |
+
"grad_norm": 8.714546203613281,
|
11204 |
+
"learning_rate": 2.716935934607434e-09,
|
11205 |
+
"logits/chosen": -10.823909759521484,
|
11206 |
+
"logits/rejected": -10.820356369018555,
|
11207 |
+
"logps/chosen": -23.897640228271484,
|
11208 |
+
"logps/rejected": -52.00101089477539,
|
11209 |
+
"loss": 0.3194,
|
11210 |
+
"rewards/accuracies": 0.6875,
|
11211 |
+
"rewards/chosen": -0.44381412863731384,
|
11212 |
+
"rewards/margins": 2.6014151573181152,
|
11213 |
+
"rewards/rejected": -3.045229196548462,
|
11214 |
+
"step": 1494
|
11215 |
+
},
|
11216 |
+
{
|
11217 |
+
"epoch": 1.86142001710864,
|
11218 |
+
"grad_norm": 4.605140209197998,
|
11219 |
+
"learning_rate": 2.6211912585772377e-09,
|
11220 |
+
"logits/chosen": -10.820908546447754,
|
11221 |
+
"logits/rejected": -10.817206382751465,
|
11222 |
+
"logps/chosen": -22.388513565063477,
|
11223 |
+
"logps/rejected": -52.58741760253906,
|
11224 |
+
"loss": 0.3037,
|
11225 |
+
"rewards/accuracies": 0.59375,
|
11226 |
+
"rewards/chosen": -0.27936896681785583,
|
11227 |
+
"rewards/margins": 2.8748831748962402,
|
11228 |
+
"rewards/rejected": -3.154252052307129,
|
11229 |
+
"step": 1496
|
11230 |
+
},
|
11231 |
+
{
|
11232 |
+
"epoch": 1.8639085465432772,
|
11233 |
+
"grad_norm": 6.918066024780273,
|
11234 |
+
"learning_rate": 2.5271415978382116e-09,
|
11235 |
+
"logits/chosen": -10.764432907104492,
|
11236 |
+
"logits/rejected": -10.769204139709473,
|
11237 |
+
"logps/chosen": -21.751548767089844,
|
11238 |
+
"logps/rejected": -47.14215087890625,
|
11239 |
+
"loss": 0.3786,
|
11240 |
+
"rewards/accuracies": 0.65625,
|
11241 |
+
"rewards/chosen": -0.1537971943616867,
|
11242 |
+
"rewards/margins": 2.537064790725708,
|
11243 |
+
"rewards/rejected": -2.690861701965332,
|
11244 |
+
"step": 1498
|
11245 |
+
},
|
11246 |
+
{
|
11247 |
+
"epoch": 1.8663970759779143,
|
11248 |
+
"grad_norm": 12.986876487731934,
|
11249 |
+
"learning_rate": 2.4347885894571484e-09,
|
11250 |
+
"logits/chosen": -10.837455749511719,
|
11251 |
+
"logits/rejected": -10.839674949645996,
|
11252 |
+
"logps/chosen": -28.00076675415039,
|
11253 |
+
"logps/rejected": -66.6291275024414,
|
11254 |
+
"loss": 0.3554,
|
11255 |
+
"rewards/accuracies": 0.6875,
|
11256 |
+
"rewards/chosen": -0.8600501418113708,
|
11257 |
+
"rewards/margins": 3.6425986289978027,
|
11258 |
+
"rewards/rejected": -4.502648830413818,
|
11259 |
+
"step": 1500
|
11260 |
}
|
11261 |
],
|
11262 |
"logging_steps": 2,
|