WizardLM-Uncensored-Falcon-40b / trainer_state.json
ehartford's picture
Upload folder using huggingface_hub
814a2bd
raw
history blame
13.1 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9930151338766007,
"global_step": 214,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 2e-05,
"loss": 0.8616,
"step": 2
},
{
"epoch": 0.04,
"learning_rate": 1.9995608365087945e-05,
"loss": 0.7122,
"step": 4
},
{
"epoch": 0.06,
"learning_rate": 1.9982437317643218e-05,
"loss": 0.6609,
"step": 6
},
{
"epoch": 0.07,
"learning_rate": 1.996049842615217e-05,
"loss": 0.6289,
"step": 8
},
{
"epoch": 0.09,
"learning_rate": 1.992981096013517e-05,
"loss": 0.6091,
"step": 10
},
{
"epoch": 0.11,
"learning_rate": 1.9890401873221642e-05,
"loss": 0.5863,
"step": 12
},
{
"epoch": 0.13,
"learning_rate": 1.984230577947597e-05,
"loss": 0.5834,
"step": 14
},
{
"epoch": 0.15,
"learning_rate": 1.9785564922995042e-05,
"loss": 0.5628,
"step": 16
},
{
"epoch": 0.17,
"learning_rate": 1.972022914080411e-05,
"loss": 0.5446,
"step": 18
},
{
"epoch": 0.19,
"learning_rate": 1.964635581908359e-05,
"loss": 0.5434,
"step": 20
},
{
"epoch": 0.2,
"learning_rate": 1.9564009842765225e-05,
"loss": 0.5196,
"step": 22
},
{
"epoch": 0.22,
"learning_rate": 1.9473263538541916e-05,
"loss": 0.5292,
"step": 24
},
{
"epoch": 0.24,
"learning_rate": 1.9374196611341212e-05,
"loss": 0.5139,
"step": 26
},
{
"epoch": 0.26,
"learning_rate": 1.9266896074318335e-05,
"loss": 0.5088,
"step": 28
},
{
"epoch": 0.28,
"learning_rate": 1.9151456172430186e-05,
"loss": 0.5079,
"step": 30
},
{
"epoch": 0.3,
"learning_rate": 1.9027978299657436e-05,
"loss": 0.5086,
"step": 32
},
{
"epoch": 0.32,
"learning_rate": 1.8896570909947477e-05,
"loss": 0.5024,
"step": 34
},
{
"epoch": 0.34,
"learning_rate": 1.875734942195637e-05,
"loss": 0.5046,
"step": 36
},
{
"epoch": 0.35,
"learning_rate": 1.8610436117673557e-05,
"loss": 0.4777,
"step": 38
},
{
"epoch": 0.37,
"learning_rate": 1.845596003501826e-05,
"loss": 0.5006,
"step": 40
},
{
"epoch": 0.39,
"learning_rate": 1.829405685450202e-05,
"loss": 0.4796,
"step": 42
},
{
"epoch": 0.41,
"learning_rate": 1.8124868780056814e-05,
"loss": 0.4701,
"step": 44
},
{
"epoch": 0.43,
"learning_rate": 1.7948544414133534e-05,
"loss": 0.4784,
"step": 46
},
{
"epoch": 0.45,
"learning_rate": 1.7765238627180424e-05,
"loss": 0.4773,
"step": 48
},
{
"epoch": 0.47,
"learning_rate": 1.7575112421616203e-05,
"loss": 0.4783,
"step": 50
},
{
"epoch": 0.48,
"learning_rate": 1.7378332790417275e-05,
"loss": 0.4772,
"step": 52
},
{
"epoch": 0.5,
"learning_rate": 1.717507257044331e-05,
"loss": 0.4671,
"step": 54
},
{
"epoch": 0.52,
"learning_rate": 1.6965510290629973e-05,
"loss": 0.4716,
"step": 56
},
{
"epoch": 0.54,
"learning_rate": 1.6749830015182106e-05,
"loss": 0.4539,
"step": 58
},
{
"epoch": 0.56,
"learning_rate": 1.6528221181905217e-05,
"loss": 0.4613,
"step": 60
},
{
"epoch": 0.58,
"learning_rate": 1.6300878435817115e-05,
"loss": 0.4758,
"step": 62
},
{
"epoch": 0.6,
"learning_rate": 1.6068001458185934e-05,
"loss": 0.4623,
"step": 64
},
{
"epoch": 0.61,
"learning_rate": 1.5829794791144723e-05,
"loss": 0.4734,
"step": 66
},
{
"epoch": 0.63,
"learning_rate": 1.5586467658036526e-05,
"loss": 0.4512,
"step": 68
},
{
"epoch": 0.65,
"learning_rate": 1.533823377964791e-05,
"loss": 0.4713,
"step": 70
},
{
"epoch": 0.67,
"learning_rate": 1.5085311186492206e-05,
"loss": 0.4789,
"step": 72
},
{
"epoch": 0.69,
"learning_rate": 1.482792202730745e-05,
"loss": 0.5663,
"step": 74
},
{
"epoch": 0.71,
"learning_rate": 1.4566292373937133e-05,
"loss": 0.4551,
"step": 76
},
{
"epoch": 0.73,
"learning_rate": 1.4300652022765207e-05,
"loss": 0.461,
"step": 78
},
{
"epoch": 0.75,
"learning_rate": 1.4031234292879726e-05,
"loss": 0.4673,
"step": 80
},
{
"epoch": 0.76,
"learning_rate": 1.3758275821142382e-05,
"loss": 0.4589,
"step": 82
},
{
"epoch": 0.78,
"learning_rate": 1.348201635434399e-05,
"loss": 0.4495,
"step": 84
},
{
"epoch": 0.8,
"learning_rate": 1.3202698538628376e-05,
"loss": 0.4645,
"step": 86
},
{
"epoch": 0.82,
"learning_rate": 1.292056770636976e-05,
"loss": 0.4555,
"step": 88
},
{
"epoch": 0.84,
"learning_rate": 1.2635871660690677e-05,
"loss": 0.4464,
"step": 90
},
{
"epoch": 0.86,
"learning_rate": 1.234886045780984e-05,
"loss": 0.4646,
"step": 92
},
{
"epoch": 0.88,
"learning_rate": 1.2059786187410984e-05,
"loss": 0.4599,
"step": 94
},
{
"epoch": 0.89,
"learning_rate": 1.176890275122573e-05,
"loss": 0.4534,
"step": 96
},
{
"epoch": 0.91,
"learning_rate": 1.1476465640024814e-05,
"loss": 0.4744,
"step": 98
},
{
"epoch": 0.93,
"learning_rate": 1.1182731709213658e-05,
"loss": 0.4626,
"step": 100
},
{
"epoch": 0.95,
"learning_rate": 1.0887958953229349e-05,
"loss": 0.4407,
"step": 102
},
{
"epoch": 0.97,
"learning_rate": 1.0592406278937143e-05,
"loss": 0.452,
"step": 104
},
{
"epoch": 0.99,
"learning_rate": 1.0296333278225599e-05,
"loss": 0.4496,
"step": 106
},
{
"epoch": 1.01,
"learning_rate": 1e-05,
"loss": 0.4138,
"step": 108
},
{
"epoch": 1.02,
"learning_rate": 9.703666721774403e-06,
"loss": 0.2924,
"step": 110
},
{
"epoch": 1.04,
"learning_rate": 9.407593721062858e-06,
"loss": 0.3184,
"step": 112
},
{
"epoch": 1.06,
"learning_rate": 9.112041046770653e-06,
"loss": 0.2885,
"step": 114
},
{
"epoch": 1.08,
"learning_rate": 8.817268290786343e-06,
"loss": 0.2842,
"step": 116
},
{
"epoch": 1.1,
"learning_rate": 8.52353435997519e-06,
"loss": 0.2763,
"step": 118
},
{
"epoch": 1.12,
"learning_rate": 8.231097248774273e-06,
"loss": 0.2765,
"step": 120
},
{
"epoch": 1.14,
"learning_rate": 7.940213812589018e-06,
"loss": 0.2788,
"step": 122
},
{
"epoch": 1.15,
"learning_rate": 7.651139542190164e-06,
"loss": 0.2821,
"step": 124
},
{
"epoch": 1.17,
"learning_rate": 7.364128339309326e-06,
"loss": 0.2834,
"step": 126
},
{
"epoch": 1.19,
"learning_rate": 7.079432293630244e-06,
"loss": 0.2728,
"step": 128
},
{
"epoch": 1.21,
"learning_rate": 6.797301461371626e-06,
"loss": 0.274,
"step": 130
},
{
"epoch": 1.23,
"learning_rate": 6.517983645656014e-06,
"loss": 0.276,
"step": 132
},
{
"epoch": 1.25,
"learning_rate": 6.241724178857621e-06,
"loss": 0.2723,
"step": 134
},
{
"epoch": 1.27,
"learning_rate": 5.96876570712028e-06,
"loss": 0.2876,
"step": 136
},
{
"epoch": 1.29,
"learning_rate": 5.699347977234799e-06,
"loss": 0.2714,
"step": 138
},
{
"epoch": 1.3,
"learning_rate": 5.43370762606287e-06,
"loss": 0.2758,
"step": 140
},
{
"epoch": 1.32,
"learning_rate": 5.172077972692553e-06,
"loss": 0.2816,
"step": 142
},
{
"epoch": 1.34,
"learning_rate": 4.914688813507798e-06,
"loss": 0.2787,
"step": 144
},
{
"epoch": 1.36,
"learning_rate": 4.661766220352098e-06,
"loss": 0.2801,
"step": 146
},
{
"epoch": 1.38,
"learning_rate": 4.413532341963477e-06,
"loss": 0.271,
"step": 148
},
{
"epoch": 1.4,
"learning_rate": 4.170205208855281e-06,
"loss": 0.2708,
"step": 150
},
{
"epoch": 1.42,
"learning_rate": 3.931998541814069e-06,
"loss": 0.2728,
"step": 152
},
{
"epoch": 1.43,
"learning_rate": 3.6991215641828903e-06,
"loss": 0.2719,
"step": 154
},
{
"epoch": 1.45,
"learning_rate": 3.4717788180947855e-06,
"loss": 0.272,
"step": 156
},
{
"epoch": 1.47,
"learning_rate": 3.250169984817897e-06,
"loss": 0.2724,
"step": 158
},
{
"epoch": 1.49,
"learning_rate": 3.0344897093700333e-06,
"loss": 0.2744,
"step": 160
},
{
"epoch": 1.51,
"learning_rate": 2.8249274295566863e-06,
"loss": 0.2736,
"step": 162
},
{
"epoch": 1.53,
"learning_rate": 2.6216672095827267e-06,
"loss": 0.2681,
"step": 164
},
{
"epoch": 1.55,
"learning_rate": 2.424887578383799e-06,
"loss": 0.2703,
"step": 166
},
{
"epoch": 1.56,
"learning_rate": 2.234761372819577e-06,
"loss": 0.2649,
"step": 168
},
{
"epoch": 1.58,
"learning_rate": 2.0514555858664663e-06,
"loss": 0.2781,
"step": 170
},
{
"epoch": 1.6,
"learning_rate": 1.875131219943187e-06,
"loss": 0.2645,
"step": 172
},
{
"epoch": 1.62,
"learning_rate": 1.7059431454979825e-06,
"loss": 0.27,
"step": 174
},
{
"epoch": 1.64,
"learning_rate": 1.5440399649817384e-06,
"loss": 0.2683,
"step": 176
},
{
"epoch": 1.66,
"learning_rate": 1.3895638823264447e-06,
"loss": 0.2645,
"step": 178
},
{
"epoch": 1.68,
"learning_rate": 1.2426505780436326e-06,
"loss": 0.2712,
"step": 180
},
{
"epoch": 1.69,
"learning_rate": 1.1034290900525279e-06,
"loss": 0.2621,
"step": 182
},
{
"epoch": 1.71,
"learning_rate": 9.720217003425648e-07,
"loss": 0.268,
"step": 184
},
{
"epoch": 1.73,
"learning_rate": 8.485438275698154e-07,
"loss": 0.2641,
"step": 186
},
{
"epoch": 1.75,
"learning_rate": 7.331039256816664e-07,
"loss": 0.2623,
"step": 188
},
{
"epoch": 1.77,
"learning_rate": 6.258033886587911e-07,
"loss": 0.2714,
"step": 190
},
{
"epoch": 1.79,
"learning_rate": 5.267364614580861e-07,
"loss": 0.2723,
"step": 192
},
{
"epoch": 1.81,
"learning_rate": 4.359901572347758e-07,
"loss": 0.2611,
"step": 194
},
{
"epoch": 1.83,
"learning_rate": 3.5364418091641374e-07,
"loss": 0.2724,
"step": 196
},
{
"epoch": 1.84,
"learning_rate": 2.7977085919589253e-07,
"loss": 0.2738,
"step": 198
},
{
"epoch": 1.86,
"learning_rate": 2.1443507700495968e-07,
"loss": 0.2623,
"step": 200
},
{
"epoch": 1.88,
"learning_rate": 1.5769422052403172e-07,
"loss": 0.2695,
"step": 202
},
{
"epoch": 1.9,
"learning_rate": 1.0959812677835968e-07,
"loss": 0.2661,
"step": 204
},
{
"epoch": 1.92,
"learning_rate": 7.018903986483083e-08,
"loss": 0.2712,
"step": 206
},
{
"epoch": 1.94,
"learning_rate": 3.950157384783104e-08,
"loss": 0.2706,
"step": 208
},
{
"epoch": 1.96,
"learning_rate": 1.7562682356786488e-08,
"loss": 0.276,
"step": 210
},
{
"epoch": 1.97,
"learning_rate": 4.39163491205652e-09,
"loss": 0.2767,
"step": 212
},
{
"epoch": 1.99,
"learning_rate": 0.0,
"loss": 0.2624,
"step": 214
}
],
"max_steps": 214,
"num_train_epochs": 2,
"total_flos": 1536113535614976.0,
"trial_name": null,
"trial_params": null
}