|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.974025974025974, |
|
"eval_steps": 39, |
|
"global_step": 308, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.006493506493506494, |
|
"grad_norm": 0.39616659283638, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 1.6277, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.006493506493506494, |
|
"eval_loss": 1.6379814147949219, |
|
"eval_runtime": 25.2293, |
|
"eval_samples_per_second": 11.693, |
|
"eval_steps_per_second": 1.467, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.012987012987012988, |
|
"grad_norm": 0.42422759532928467, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.6549, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01948051948051948, |
|
"grad_norm": 0.40566202998161316, |
|
"learning_rate": 3e-06, |
|
"loss": 1.6115, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.025974025974025976, |
|
"grad_norm": 0.3940100371837616, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 1.617, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.032467532467532464, |
|
"grad_norm": 0.3876812756061554, |
|
"learning_rate": 5e-06, |
|
"loss": 1.6561, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.03896103896103896, |
|
"grad_norm": 0.41130709648132324, |
|
"learning_rate": 6e-06, |
|
"loss": 1.606, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.045454545454545456, |
|
"grad_norm": 0.3654179573059082, |
|
"learning_rate": 7e-06, |
|
"loss": 1.605, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.05194805194805195, |
|
"grad_norm": 0.35875341296195984, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.5651, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.05844155844155844, |
|
"grad_norm": 0.3530851900577545, |
|
"learning_rate": 9e-06, |
|
"loss": 1.5201, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.06493506493506493, |
|
"grad_norm": 0.38295701146125793, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6376, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07142857142857142, |
|
"grad_norm": 0.39765068888664246, |
|
"learning_rate": 9.999722154604716e-06, |
|
"loss": 1.5167, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.07792207792207792, |
|
"grad_norm": 0.44107645750045776, |
|
"learning_rate": 9.99888864929809e-06, |
|
"loss": 1.6259, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.08441558441558442, |
|
"grad_norm": 0.4228634238243103, |
|
"learning_rate": 9.997499576714369e-06, |
|
"loss": 1.6767, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.09090909090909091, |
|
"grad_norm": 0.4122574031352997, |
|
"learning_rate": 9.995555091232516e-06, |
|
"loss": 1.6082, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.09740259740259741, |
|
"grad_norm": 0.43088310956954956, |
|
"learning_rate": 9.99305540895907e-06, |
|
"loss": 1.6651, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.1038961038961039, |
|
"grad_norm": 0.4325619637966156, |
|
"learning_rate": 9.990000807704114e-06, |
|
"loss": 1.6473, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.11038961038961038, |
|
"grad_norm": 0.4201916456222534, |
|
"learning_rate": 9.986391626950405e-06, |
|
"loss": 1.5726, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.11688311688311688, |
|
"grad_norm": 0.4616028070449829, |
|
"learning_rate": 9.982228267815644e-06, |
|
"loss": 1.5994, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.12337662337662338, |
|
"grad_norm": 0.4239194989204407, |
|
"learning_rate": 9.977511193007896e-06, |
|
"loss": 1.6018, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.12987012987012986, |
|
"grad_norm": 0.41312310099601746, |
|
"learning_rate": 9.972240926774167e-06, |
|
"loss": 1.6497, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.13636363636363635, |
|
"grad_norm": 0.45023512840270996, |
|
"learning_rate": 9.966418054842143e-06, |
|
"loss": 1.557, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.14285714285714285, |
|
"grad_norm": 0.4155130982398987, |
|
"learning_rate": 9.960043224355081e-06, |
|
"loss": 1.5782, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.14935064935064934, |
|
"grad_norm": 0.39791709184646606, |
|
"learning_rate": 9.9531171437999e-06, |
|
"loss": 1.548, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.15584415584415584, |
|
"grad_norm": 0.40294748544692993, |
|
"learning_rate": 9.945640582928438e-06, |
|
"loss": 1.5745, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.16233766233766234, |
|
"grad_norm": 0.3911401927471161, |
|
"learning_rate": 9.937614372671896e-06, |
|
"loss": 1.5653, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.16883116883116883, |
|
"grad_norm": 0.407604843378067, |
|
"learning_rate": 9.929039405048502e-06, |
|
"loss": 1.5627, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.17532467532467533, |
|
"grad_norm": 0.4176672697067261, |
|
"learning_rate": 9.919916633064363e-06, |
|
"loss": 1.5413, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.18181818181818182, |
|
"grad_norm": 0.40910884737968445, |
|
"learning_rate": 9.91024707060755e-06, |
|
"loss": 1.5688, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.18831168831168832, |
|
"grad_norm": 0.393100768327713, |
|
"learning_rate": 9.900031792335432e-06, |
|
"loss": 1.5259, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.19480519480519481, |
|
"grad_norm": 0.39668065309524536, |
|
"learning_rate": 9.889271933555214e-06, |
|
"loss": 1.5471, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2012987012987013, |
|
"grad_norm": 0.37264811992645264, |
|
"learning_rate": 9.877968690097785e-06, |
|
"loss": 1.488, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.2077922077922078, |
|
"grad_norm": 0.39069730043411255, |
|
"learning_rate": 9.866123318184803e-06, |
|
"loss": 1.5863, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.21428571428571427, |
|
"grad_norm": 0.37075284123420715, |
|
"learning_rate": 9.853737134289086e-06, |
|
"loss": 1.4967, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.22077922077922077, |
|
"grad_norm": 0.37381497025489807, |
|
"learning_rate": 9.840811514988294e-06, |
|
"loss": 1.4219, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.22727272727272727, |
|
"grad_norm": 0.37528884410858154, |
|
"learning_rate": 9.827347896811954e-06, |
|
"loss": 1.4754, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.23376623376623376, |
|
"grad_norm": 0.3899977505207062, |
|
"learning_rate": 9.81334777608179e-06, |
|
"loss": 1.5201, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.24025974025974026, |
|
"grad_norm": 0.37340793013572693, |
|
"learning_rate": 9.798812708745431e-06, |
|
"loss": 1.4467, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.24675324675324675, |
|
"grad_norm": 0.3682302236557007, |
|
"learning_rate": 9.783744310203492e-06, |
|
"loss": 1.3911, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.2532467532467532, |
|
"grad_norm": 0.37759163975715637, |
|
"learning_rate": 9.76814425513003e-06, |
|
"loss": 1.4242, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.2532467532467532, |
|
"eval_loss": 1.451162338256836, |
|
"eval_runtime": 25.2272, |
|
"eval_samples_per_second": 11.694, |
|
"eval_steps_per_second": 1.467, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.2597402597402597, |
|
"grad_norm": 0.3705059885978699, |
|
"learning_rate": 9.752014277286433e-06, |
|
"loss": 1.4542, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2662337662337662, |
|
"grad_norm": 0.34180477261543274, |
|
"learning_rate": 9.73535616932873e-06, |
|
"loss": 1.4108, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.2727272727272727, |
|
"grad_norm": 0.34464818239212036, |
|
"learning_rate": 9.718171782608355e-06, |
|
"loss": 1.4691, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.2792207792207792, |
|
"grad_norm": 0.37641096115112305, |
|
"learning_rate": 9.7004630269664e-06, |
|
"loss": 1.3784, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.2857142857142857, |
|
"grad_norm": 0.335043340921402, |
|
"learning_rate": 9.682231870521347e-06, |
|
"loss": 1.4899, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.2922077922077922, |
|
"grad_norm": 0.3437371551990509, |
|
"learning_rate": 9.663480339450344e-06, |
|
"loss": 1.4003, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.2987012987012987, |
|
"grad_norm": 0.3729651868343353, |
|
"learning_rate": 9.644210517764014e-06, |
|
"loss": 1.3884, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.3051948051948052, |
|
"grad_norm": 0.33896854519844055, |
|
"learning_rate": 9.624424547074851e-06, |
|
"loss": 1.4258, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.3116883116883117, |
|
"grad_norm": 0.36020371317863464, |
|
"learning_rate": 9.60412462635919e-06, |
|
"loss": 1.3703, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.3181818181818182, |
|
"grad_norm": 0.3405916392803192, |
|
"learning_rate": 9.583313011712832e-06, |
|
"loss": 1.3407, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.3246753246753247, |
|
"grad_norm": 0.3447131812572479, |
|
"learning_rate": 9.561992016100293e-06, |
|
"loss": 1.3456, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.33116883116883117, |
|
"grad_norm": 0.35361772775650024, |
|
"learning_rate": 9.540164009097756e-06, |
|
"loss": 1.3384, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.33766233766233766, |
|
"grad_norm": 0.3525960147380829, |
|
"learning_rate": 9.517831416629717e-06, |
|
"loss": 1.3726, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.34415584415584416, |
|
"grad_norm": 0.3269696831703186, |
|
"learning_rate": 9.494996720699363e-06, |
|
"loss": 1.3664, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.35064935064935066, |
|
"grad_norm": 0.3153081238269806, |
|
"learning_rate": 9.471662459112747e-06, |
|
"loss": 1.3448, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.35714285714285715, |
|
"grad_norm": 0.3017883002758026, |
|
"learning_rate": 9.44783122519672e-06, |
|
"loss": 1.3228, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.36363636363636365, |
|
"grad_norm": 0.314627081155777, |
|
"learning_rate": 9.423505667510724e-06, |
|
"loss": 1.3565, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.37012987012987014, |
|
"grad_norm": 0.3048722743988037, |
|
"learning_rate": 9.398688489552437e-06, |
|
"loss": 1.2669, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.37662337662337664, |
|
"grad_norm": 0.3107397258281708, |
|
"learning_rate": 9.373382449457305e-06, |
|
"loss": 1.2871, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.38311688311688313, |
|
"grad_norm": 0.3061436116695404, |
|
"learning_rate": 9.347590359692015e-06, |
|
"loss": 1.3015, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.38961038961038963, |
|
"grad_norm": 0.31199324131011963, |
|
"learning_rate": 9.321315086741916e-06, |
|
"loss": 1.3196, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.3961038961038961, |
|
"grad_norm": 0.3002881109714508, |
|
"learning_rate": 9.294559550792451e-06, |
|
"loss": 1.3404, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.4025974025974026, |
|
"grad_norm": 0.2965368330478668, |
|
"learning_rate": 9.2673267254046e-06, |
|
"loss": 1.2931, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.4090909090909091, |
|
"grad_norm": 0.2912820279598236, |
|
"learning_rate": 9.23961963718442e-06, |
|
"loss": 1.2941, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.4155844155844156, |
|
"grad_norm": 0.29220762848854065, |
|
"learning_rate": 9.211441365446661e-06, |
|
"loss": 1.306, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.42207792207792205, |
|
"grad_norm": 0.2802380323410034, |
|
"learning_rate": 9.182795041872543e-06, |
|
"loss": 1.2641, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.42857142857142855, |
|
"grad_norm": 0.28170621395111084, |
|
"learning_rate": 9.153683850161706e-06, |
|
"loss": 1.2459, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.43506493506493504, |
|
"grad_norm": 0.2919502854347229, |
|
"learning_rate": 9.124111025678378e-06, |
|
"loss": 1.2736, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.44155844155844154, |
|
"grad_norm": 0.2793984115123749, |
|
"learning_rate": 9.094079855091797e-06, |
|
"loss": 1.265, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.44805194805194803, |
|
"grad_norm": 0.2946363091468811, |
|
"learning_rate": 9.063593676010954e-06, |
|
"loss": 1.2669, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.45454545454545453, |
|
"grad_norm": 0.2771857678890228, |
|
"learning_rate": 9.032655876613636e-06, |
|
"loss": 1.2479, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.461038961038961, |
|
"grad_norm": 0.26628848910331726, |
|
"learning_rate": 9.001269895269886e-06, |
|
"loss": 1.2764, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.4675324675324675, |
|
"grad_norm": 0.3006434440612793, |
|
"learning_rate": 8.969439220159861e-06, |
|
"loss": 1.2286, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.474025974025974, |
|
"grad_norm": 0.27274397015571594, |
|
"learning_rate": 8.937167388886163e-06, |
|
"loss": 1.3059, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.4805194805194805, |
|
"grad_norm": 0.2763414978981018, |
|
"learning_rate": 8.904457988080682e-06, |
|
"loss": 1.2095, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.487012987012987, |
|
"grad_norm": 0.28402575850486755, |
|
"learning_rate": 8.871314653005972e-06, |
|
"loss": 1.2601, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.4935064935064935, |
|
"grad_norm": 0.2892557680606842, |
|
"learning_rate": 8.837741067151251e-06, |
|
"loss": 1.2342, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.2644696533679962, |
|
"learning_rate": 8.80374096182301e-06, |
|
"loss": 1.2744, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.5064935064935064, |
|
"grad_norm": 0.25198492407798767, |
|
"learning_rate": 8.76931811573033e-06, |
|
"loss": 1.2885, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.5064935064935064, |
|
"eval_loss": 1.2617864608764648, |
|
"eval_runtime": 25.3815, |
|
"eval_samples_per_second": 11.623, |
|
"eval_steps_per_second": 1.458, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.512987012987013, |
|
"grad_norm": 0.2804642915725708, |
|
"learning_rate": 8.734476354564924e-06, |
|
"loss": 1.2583, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.5194805194805194, |
|
"grad_norm": 0.2729627788066864, |
|
"learning_rate": 8.699219550575954e-06, |
|
"loss": 1.2246, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.525974025974026, |
|
"grad_norm": 0.2520177662372589, |
|
"learning_rate": 8.663551622139674e-06, |
|
"loss": 1.2599, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.5324675324675324, |
|
"grad_norm": 0.2614675760269165, |
|
"learning_rate": 8.627476533323957e-06, |
|
"loss": 1.2165, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.538961038961039, |
|
"grad_norm": 0.3191888928413391, |
|
"learning_rate": 8.590998293447728e-06, |
|
"loss": 1.2558, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.5454545454545454, |
|
"grad_norm": 0.27159151434898376, |
|
"learning_rate": 8.554120956635375e-06, |
|
"loss": 1.2197, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.551948051948052, |
|
"grad_norm": 0.291990727186203, |
|
"learning_rate": 8.516848621366188e-06, |
|
"loss": 1.219, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.5584415584415584, |
|
"grad_norm": 0.23849813640117645, |
|
"learning_rate": 8.47918543001886e-06, |
|
"loss": 1.2199, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.564935064935065, |
|
"grad_norm": 0.26883506774902344, |
|
"learning_rate": 8.441135568411102e-06, |
|
"loss": 1.1959, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 0.2667544484138489, |
|
"learning_rate": 8.402703265334455e-06, |
|
"loss": 1.2682, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.577922077922078, |
|
"grad_norm": 0.24013420939445496, |
|
"learning_rate": 8.363892792084291e-06, |
|
"loss": 1.1649, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.5844155844155844, |
|
"grad_norm": 0.2544495463371277, |
|
"learning_rate": 8.324708461985124e-06, |
|
"loss": 1.1929, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5909090909090909, |
|
"grad_norm": 0.2654297351837158, |
|
"learning_rate": 8.285154629911227e-06, |
|
"loss": 1.18, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.5974025974025974, |
|
"grad_norm": 0.26719850301742554, |
|
"learning_rate": 8.245235691802644e-06, |
|
"loss": 1.2933, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.6038961038961039, |
|
"grad_norm": 0.2760712802410126, |
|
"learning_rate": 8.20495608417663e-06, |
|
"loss": 1.1612, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.6103896103896104, |
|
"grad_norm": 0.2652733623981476, |
|
"learning_rate": 8.164320283634585e-06, |
|
"loss": 1.2125, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.6168831168831169, |
|
"grad_norm": 0.24604123830795288, |
|
"learning_rate": 8.123332806364537e-06, |
|
"loss": 1.1801, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.6233766233766234, |
|
"grad_norm": 0.23077791929244995, |
|
"learning_rate": 8.081998207639212e-06, |
|
"loss": 1.2016, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.6298701298701299, |
|
"grad_norm": 0.25489139556884766, |
|
"learning_rate": 8.040321081309783e-06, |
|
"loss": 1.2049, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.6363636363636364, |
|
"grad_norm": 0.2564036250114441, |
|
"learning_rate": 7.998306059295302e-06, |
|
"loss": 1.2377, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.6428571428571429, |
|
"grad_norm": 0.2734230160713196, |
|
"learning_rate": 7.955957811067932e-06, |
|
"loss": 1.1107, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.6493506493506493, |
|
"grad_norm": 0.2672719359397888, |
|
"learning_rate": 7.913281043133978e-06, |
|
"loss": 1.1863, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6558441558441559, |
|
"grad_norm": 0.263724684715271, |
|
"learning_rate": 7.870280498510824e-06, |
|
"loss": 1.2678, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.6623376623376623, |
|
"grad_norm": 0.27098724246025085, |
|
"learning_rate": 7.826960956199796e-06, |
|
"loss": 1.1656, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.6688311688311688, |
|
"grad_norm": 0.29257479310035706, |
|
"learning_rate": 7.783327230655036e-06, |
|
"loss": 1.1749, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.6753246753246753, |
|
"grad_norm": 0.26874226331710815, |
|
"learning_rate": 7.739384171248436e-06, |
|
"loss": 1.2013, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.6818181818181818, |
|
"grad_norm": 0.26934632658958435, |
|
"learning_rate": 7.695136661730677e-06, |
|
"loss": 1.1507, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.6883116883116883, |
|
"grad_norm": 0.27807483077049255, |
|
"learning_rate": 7.650589619688468e-06, |
|
"loss": 1.1729, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.6948051948051948, |
|
"grad_norm": 0.2936646342277527, |
|
"learning_rate": 7.6057479959980145e-06, |
|
"loss": 1.1646, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.7012987012987013, |
|
"grad_norm": 0.28149378299713135, |
|
"learning_rate": 7.560616774274775e-06, |
|
"loss": 1.1939, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.7077922077922078, |
|
"grad_norm": 0.25706660747528076, |
|
"learning_rate": 7.5152009703196105e-06, |
|
"loss": 1.1708, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 0.29964110255241394, |
|
"learning_rate": 7.469505631561318e-06, |
|
"loss": 1.2161, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7207792207792207, |
|
"grad_norm": 0.24933487176895142, |
|
"learning_rate": 7.423535836495683e-06, |
|
"loss": 1.1641, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.7272727272727273, |
|
"grad_norm": 0.27039459347724915, |
|
"learning_rate": 7.3772966941210585e-06, |
|
"loss": 1.1563, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.7337662337662337, |
|
"grad_norm": 0.2490512579679489, |
|
"learning_rate": 7.33079334337056e-06, |
|
"loss": 1.1887, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.7402597402597403, |
|
"grad_norm": 0.28315550088882446, |
|
"learning_rate": 7.284030952540937e-06, |
|
"loss": 1.1189, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.7467532467532467, |
|
"grad_norm": 0.2557179033756256, |
|
"learning_rate": 7.2370147187181736e-06, |
|
"loss": 1.1812, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.7532467532467533, |
|
"grad_norm": 0.2875461280345917, |
|
"learning_rate": 7.189749867199899e-06, |
|
"loss": 1.1534, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.7597402597402597, |
|
"grad_norm": 0.26117077469825745, |
|
"learning_rate": 7.142241650914654e-06, |
|
"loss": 1.1618, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.7597402597402597, |
|
"eval_loss": 1.2008626461029053, |
|
"eval_runtime": 25.2533, |
|
"eval_samples_per_second": 11.682, |
|
"eval_steps_per_second": 1.465, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.7662337662337663, |
|
"grad_norm": 0.29663676023483276, |
|
"learning_rate": 7.094495349838093e-06, |
|
"loss": 1.1064, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.7727272727272727, |
|
"grad_norm": 0.23107394576072693, |
|
"learning_rate": 7.046516270406174e-06, |
|
"loss": 1.1464, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.7792207792207793, |
|
"grad_norm": 0.2502164840698242, |
|
"learning_rate": 6.998309744925411e-06, |
|
"loss": 1.1998, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7857142857142857, |
|
"grad_norm": 0.25331148505210876, |
|
"learning_rate": 6.9498811309802595e-06, |
|
"loss": 1.1784, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.7922077922077922, |
|
"grad_norm": 0.2596096694469452, |
|
"learning_rate": 6.901235810837668e-06, |
|
"loss": 1.1034, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.7987012987012987, |
|
"grad_norm": 0.26797452569007874, |
|
"learning_rate": 6.852379190848923e-06, |
|
"loss": 1.1264, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.8051948051948052, |
|
"grad_norm": 0.31169766187667847, |
|
"learning_rate": 6.8033167008487784e-06, |
|
"loss": 1.1386, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.8116883116883117, |
|
"grad_norm": 0.26767072081565857, |
|
"learning_rate": 6.754053793552005e-06, |
|
"loss": 1.2137, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.8181818181818182, |
|
"grad_norm": 0.2596385180950165, |
|
"learning_rate": 6.704595943947385e-06, |
|
"loss": 1.1466, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.8246753246753247, |
|
"grad_norm": 0.27891236543655396, |
|
"learning_rate": 6.654948648689228e-06, |
|
"loss": 1.1371, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.8311688311688312, |
|
"grad_norm": 0.28439176082611084, |
|
"learning_rate": 6.605117425486483e-06, |
|
"loss": 1.1948, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.8376623376623377, |
|
"grad_norm": 0.2944129705429077, |
|
"learning_rate": 6.555107812489513e-06, |
|
"loss": 1.1169, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.8441558441558441, |
|
"grad_norm": 0.2609187960624695, |
|
"learning_rate": 6.504925367674595e-06, |
|
"loss": 1.1503, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8506493506493507, |
|
"grad_norm": 0.27614086866378784, |
|
"learning_rate": 6.454575668226215e-06, |
|
"loss": 1.1835, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"grad_norm": 0.2971368134021759, |
|
"learning_rate": 6.40406430991723e-06, |
|
"loss": 1.1816, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.8636363636363636, |
|
"grad_norm": 0.28436651825904846, |
|
"learning_rate": 6.353396906486971e-06, |
|
"loss": 1.1947, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.8701298701298701, |
|
"grad_norm": 0.2339404821395874, |
|
"learning_rate": 6.302579089017328e-06, |
|
"loss": 1.1027, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.8766233766233766, |
|
"grad_norm": 0.27757248282432556, |
|
"learning_rate": 6.251616505306933e-06, |
|
"loss": 1.1294, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.8831168831168831, |
|
"grad_norm": 0.27656033635139465, |
|
"learning_rate": 6.200514819243476e-06, |
|
"loss": 1.1313, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.8896103896103896, |
|
"grad_norm": 0.26819008588790894, |
|
"learning_rate": 6.149279710174219e-06, |
|
"loss": 1.2036, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.8961038961038961, |
|
"grad_norm": 0.3008396029472351, |
|
"learning_rate": 6.097916872274815e-06, |
|
"loss": 1.1512, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.9025974025974026, |
|
"grad_norm": 0.29651182889938354, |
|
"learning_rate": 6.046432013916467e-06, |
|
"loss": 1.1412, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 0.275259405374527, |
|
"learning_rate": 5.9948308570315e-06, |
|
"loss": 1.1726, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9155844155844156, |
|
"grad_norm": 0.26858457922935486, |
|
"learning_rate": 5.943119136477449e-06, |
|
"loss": 1.1701, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.922077922077922, |
|
"grad_norm": 0.273671954870224, |
|
"learning_rate": 5.891302599399686e-06, |
|
"loss": 1.165, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.9285714285714286, |
|
"grad_norm": 0.26044774055480957, |
|
"learning_rate": 5.839387004592705e-06, |
|
"loss": 1.1119, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.935064935064935, |
|
"grad_norm": 0.24865947663784027, |
|
"learning_rate": 5.78737812186009e-06, |
|
"loss": 1.1598, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.9415584415584416, |
|
"grad_norm": 0.2713409960269928, |
|
"learning_rate": 5.735281731373271e-06, |
|
"loss": 1.1543, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.948051948051948, |
|
"grad_norm": 0.2865453362464905, |
|
"learning_rate": 5.6831036230291345e-06, |
|
"loss": 1.1379, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.9545454545454546, |
|
"grad_norm": 0.26891422271728516, |
|
"learning_rate": 5.630849595806534e-06, |
|
"loss": 1.1382, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.961038961038961, |
|
"grad_norm": 0.3001209795475006, |
|
"learning_rate": 5.578525457121807e-06, |
|
"loss": 1.1674, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.9675324675324676, |
|
"grad_norm": 0.2672886848449707, |
|
"learning_rate": 5.526137022183356e-06, |
|
"loss": 1.1209, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.974025974025974, |
|
"grad_norm": 0.27608615159988403, |
|
"learning_rate": 5.473690113345343e-06, |
|
"loss": 1.1855, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9805194805194806, |
|
"grad_norm": 0.2823050618171692, |
|
"learning_rate": 5.4211905594606165e-06, |
|
"loss": 1.1433, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.987012987012987, |
|
"grad_norm": 0.2888166308403015, |
|
"learning_rate": 5.368644195232896e-06, |
|
"loss": 1.1413, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.9935064935064936, |
|
"grad_norm": 0.2720174193382263, |
|
"learning_rate": 5.316056860568318e-06, |
|
"loss": 1.1657, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.28704148530960083, |
|
"learning_rate": 5.2634343999263985e-06, |
|
"loss": 1.1606, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 1.0064935064935066, |
|
"grad_norm": 0.2424069494009018, |
|
"learning_rate": 5.210782661670486e-06, |
|
"loss": 1.1506, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.0129870129870129, |
|
"grad_norm": 0.2754787802696228, |
|
"learning_rate": 5.158107497417795e-06, |
|
"loss": 1.1186, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.0129870129870129, |
|
"eval_loss": 1.1744325160980225, |
|
"eval_runtime": 25.2167, |
|
"eval_samples_per_second": 11.699, |
|
"eval_steps_per_second": 1.467, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.0194805194805194, |
|
"grad_norm": 0.2637002170085907, |
|
"learning_rate": 5.105414761389056e-06, |
|
"loss": 1.178, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 1.025974025974026, |
|
"grad_norm": 0.26754647493362427, |
|
"learning_rate": 5.052710309757899e-06, |
|
"loss": 1.1329, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 1.0064935064935066, |
|
"grad_norm": 0.2765344977378845, |
|
"learning_rate": 5e-06, |
|
"loss": 1.0933, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 1.0129870129870129, |
|
"grad_norm": 0.2661243677139282, |
|
"learning_rate": 4.947289690242103e-06, |
|
"loss": 1.0931, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0194805194805194, |
|
"grad_norm": 0.2913898527622223, |
|
"learning_rate": 4.894585238610946e-06, |
|
"loss": 1.0963, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 1.025974025974026, |
|
"grad_norm": 0.2579714357852936, |
|
"learning_rate": 4.841892502582206e-06, |
|
"loss": 1.1984, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 1.0324675324675325, |
|
"grad_norm": 0.2802336513996124, |
|
"learning_rate": 4.789217338329515e-06, |
|
"loss": 1.1592, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 1.0389610389610389, |
|
"grad_norm": 0.27885061502456665, |
|
"learning_rate": 4.736565600073602e-06, |
|
"loss": 1.1184, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.0454545454545454, |
|
"grad_norm": 0.26521897315979004, |
|
"learning_rate": 4.683943139431683e-06, |
|
"loss": 1.1685, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.051948051948052, |
|
"grad_norm": 0.30475732684135437, |
|
"learning_rate": 4.631355804767106e-06, |
|
"loss": 1.105, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 1.0584415584415585, |
|
"grad_norm": 0.2804529070854187, |
|
"learning_rate": 4.578809440539386e-06, |
|
"loss": 1.0735, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 1.0649350649350648, |
|
"grad_norm": 0.2721943259239197, |
|
"learning_rate": 4.526309886654659e-06, |
|
"loss": 1.187, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.0714285714285714, |
|
"grad_norm": 0.2899441421031952, |
|
"learning_rate": 4.473862977816647e-06, |
|
"loss": 1.1375, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 1.077922077922078, |
|
"grad_norm": 0.27229487895965576, |
|
"learning_rate": 4.4214745428781946e-06, |
|
"loss": 1.1079, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.0844155844155845, |
|
"grad_norm": 0.28253939747810364, |
|
"learning_rate": 4.369150404193467e-06, |
|
"loss": 1.1283, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 1.0909090909090908, |
|
"grad_norm": 0.31305843591690063, |
|
"learning_rate": 4.316896376970866e-06, |
|
"loss": 1.1576, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.0974025974025974, |
|
"grad_norm": 0.26900458335876465, |
|
"learning_rate": 4.264718268626729e-06, |
|
"loss": 1.1694, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 1.103896103896104, |
|
"grad_norm": 0.27618759870529175, |
|
"learning_rate": 4.212621878139912e-06, |
|
"loss": 1.0882, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.1103896103896105, |
|
"grad_norm": 0.3342863619327545, |
|
"learning_rate": 4.160612995407296e-06, |
|
"loss": 1.1103, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.1168831168831168, |
|
"grad_norm": 0.3191912770271301, |
|
"learning_rate": 4.108697400600316e-06, |
|
"loss": 1.1528, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.1233766233766234, |
|
"grad_norm": 0.28136956691741943, |
|
"learning_rate": 4.056880863522553e-06, |
|
"loss": 1.1239, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 1.12987012987013, |
|
"grad_norm": 0.3044598400592804, |
|
"learning_rate": 4.005169142968503e-06, |
|
"loss": 1.1396, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.1363636363636362, |
|
"grad_norm": 0.26966431736946106, |
|
"learning_rate": 3.953567986083535e-06, |
|
"loss": 1.1166, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 1.1428571428571428, |
|
"grad_norm": 0.2642230689525604, |
|
"learning_rate": 3.902083127725186e-06, |
|
"loss": 1.1156, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.1493506493506493, |
|
"grad_norm": 0.27723589539527893, |
|
"learning_rate": 3.850720289825783e-06, |
|
"loss": 1.106, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 1.155844155844156, |
|
"grad_norm": 0.25195732712745667, |
|
"learning_rate": 3.799485180756526e-06, |
|
"loss": 1.1473, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.1623376623376624, |
|
"grad_norm": 0.2634832561016083, |
|
"learning_rate": 3.7483834946930682e-06, |
|
"loss": 1.12, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 1.1688311688311688, |
|
"grad_norm": 0.2719174027442932, |
|
"learning_rate": 3.6974209109826724e-06, |
|
"loss": 1.1379, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.1753246753246753, |
|
"grad_norm": 0.2815384566783905, |
|
"learning_rate": 3.64660309351303e-06, |
|
"loss": 1.1548, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.1818181818181819, |
|
"grad_norm": 0.2685678005218506, |
|
"learning_rate": 3.595935690082769e-06, |
|
"loss": 1.1321, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.1883116883116882, |
|
"grad_norm": 0.282886803150177, |
|
"learning_rate": 3.545424331773787e-06, |
|
"loss": 1.159, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.1948051948051948, |
|
"grad_norm": 0.2961716055870056, |
|
"learning_rate": 3.495074632325407e-06, |
|
"loss": 1.0941, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.2012987012987013, |
|
"grad_norm": 0.26541033387184143, |
|
"learning_rate": 3.4448921875104898e-06, |
|
"loss": 1.1487, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.2077922077922079, |
|
"grad_norm": 0.3059718608856201, |
|
"learning_rate": 3.3948825745135196e-06, |
|
"loss": 1.1529, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.2142857142857142, |
|
"grad_norm": 0.2965088486671448, |
|
"learning_rate": 3.345051351310774e-06, |
|
"loss": 1.0652, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 1.2207792207792207, |
|
"grad_norm": 0.2668808102607727, |
|
"learning_rate": 3.295404056052616e-06, |
|
"loss": 1.1231, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.2272727272727273, |
|
"grad_norm": 0.2890554666519165, |
|
"learning_rate": 3.2459462064479972e-06, |
|
"loss": 1.15, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 1.2337662337662338, |
|
"grad_norm": 0.25789421796798706, |
|
"learning_rate": 3.1966832991512232e-06, |
|
"loss": 1.1434, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.2402597402597402, |
|
"grad_norm": 0.28866487741470337, |
|
"learning_rate": 3.147620809151078e-06, |
|
"loss": 1.1237, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.2402597402597402, |
|
"eval_loss": 1.1614325046539307, |
|
"eval_runtime": 25.3147, |
|
"eval_samples_per_second": 11.653, |
|
"eval_steps_per_second": 1.462, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.2467532467532467, |
|
"grad_norm": 0.26898378133773804, |
|
"learning_rate": 3.098764189162332e-06, |
|
"loss": 1.1265, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.2532467532467533, |
|
"grad_norm": 0.2718038260936737, |
|
"learning_rate": 3.0501188690197418e-06, |
|
"loss": 1.1012, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 1.2597402597402598, |
|
"grad_norm": 0.27878862619400024, |
|
"learning_rate": 3.0016902550745896e-06, |
|
"loss": 1.1454, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.2662337662337662, |
|
"grad_norm": 0.28224632143974304, |
|
"learning_rate": 2.9534837295938268e-06, |
|
"loss": 1.1561, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 1.2727272727272727, |
|
"grad_norm": 0.2997869849205017, |
|
"learning_rate": 2.9055046501619088e-06, |
|
"loss": 1.1408, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.2792207792207793, |
|
"grad_norm": 0.29859447479248047, |
|
"learning_rate": 2.857758349085348e-06, |
|
"loss": 1.1105, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 1.2857142857142856, |
|
"grad_norm": 0.2930034101009369, |
|
"learning_rate": 2.810250132800103e-06, |
|
"loss": 1.1113, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.2922077922077921, |
|
"grad_norm": 0.29845744371414185, |
|
"learning_rate": 2.762985281281828e-06, |
|
"loss": 1.1702, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 1.2987012987012987, |
|
"grad_norm": 0.27331170439720154, |
|
"learning_rate": 2.715969047459066e-06, |
|
"loss": 1.1197, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.3051948051948052, |
|
"grad_norm": 0.31652095913887024, |
|
"learning_rate": 2.6692066566294393e-06, |
|
"loss": 1.1422, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.3116883116883118, |
|
"grad_norm": 0.3164924383163452, |
|
"learning_rate": 2.622703305878941e-06, |
|
"loss": 1.1179, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.3181818181818181, |
|
"grad_norm": 0.25467604398727417, |
|
"learning_rate": 2.5764641635043174e-06, |
|
"loss": 1.0839, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 1.3246753246753247, |
|
"grad_norm": 0.2731601297855377, |
|
"learning_rate": 2.530494368438683e-06, |
|
"loss": 1.1033, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.3311688311688312, |
|
"grad_norm": 0.2869837284088135, |
|
"learning_rate": 2.4847990296803907e-06, |
|
"loss": 1.1179, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 1.3376623376623376, |
|
"grad_norm": 0.2819492816925049, |
|
"learning_rate": 2.4393832257252253e-06, |
|
"loss": 1.012, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.344155844155844, |
|
"grad_norm": 0.267045259475708, |
|
"learning_rate": 2.394252004001989e-06, |
|
"loss": 1.0931, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.3506493506493507, |
|
"grad_norm": 0.32312628626823425, |
|
"learning_rate": 2.349410380311532e-06, |
|
"loss": 1.1414, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.3571428571428572, |
|
"grad_norm": 0.25884246826171875, |
|
"learning_rate": 2.304863338269326e-06, |
|
"loss": 1.126, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 1.3636363636363638, |
|
"grad_norm": 0.3011741638183594, |
|
"learning_rate": 2.2606158287515662e-06, |
|
"loss": 1.1247, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.37012987012987, |
|
"grad_norm": 0.2756052017211914, |
|
"learning_rate": 2.216672769344965e-06, |
|
"loss": 1.1196, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.3766233766233766, |
|
"grad_norm": 0.2793034613132477, |
|
"learning_rate": 2.1730390438002056e-06, |
|
"loss": 1.1372, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.3831168831168832, |
|
"grad_norm": 0.2940700948238373, |
|
"learning_rate": 2.129719501489177e-06, |
|
"loss": 1.0846, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.3896103896103895, |
|
"grad_norm": 0.30984777212142944, |
|
"learning_rate": 2.086718956866024e-06, |
|
"loss": 1.1404, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.396103896103896, |
|
"grad_norm": 0.25352832674980164, |
|
"learning_rate": 2.044042188932068e-06, |
|
"loss": 1.1223, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 1.4025974025974026, |
|
"grad_norm": 0.31949445605278015, |
|
"learning_rate": 2.0016939407046987e-06, |
|
"loss": 1.0899, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.4090909090909092, |
|
"grad_norm": 0.2721538245677948, |
|
"learning_rate": 1.9596789186902184e-06, |
|
"loss": 1.0571, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 1.4155844155844157, |
|
"grad_norm": 0.2586475610733032, |
|
"learning_rate": 1.9180017923607884e-06, |
|
"loss": 1.0164, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.422077922077922, |
|
"grad_norm": 0.29858991503715515, |
|
"learning_rate": 1.8766671936354647e-06, |
|
"loss": 1.155, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 0.31114456057548523, |
|
"learning_rate": 1.8356797163654172e-06, |
|
"loss": 1.1546, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.435064935064935, |
|
"grad_norm": 0.27457138895988464, |
|
"learning_rate": 1.795043915823373e-06, |
|
"loss": 1.1159, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.4415584415584415, |
|
"grad_norm": 0.31147733330726624, |
|
"learning_rate": 1.754764308197358e-06, |
|
"loss": 1.1316, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.448051948051948, |
|
"grad_norm": 0.30539748072624207, |
|
"learning_rate": 1.7148453700887747e-06, |
|
"loss": 1.1135, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 1.4545454545454546, |
|
"grad_norm": 0.2620432674884796, |
|
"learning_rate": 1.6752915380148772e-06, |
|
"loss": 1.136, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.4610389610389611, |
|
"grad_norm": 0.3202950060367584, |
|
"learning_rate": 1.6361072079157092e-06, |
|
"loss": 1.1635, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 1.4675324675324675, |
|
"grad_norm": 0.3076479732990265, |
|
"learning_rate": 1.5972967346655449e-06, |
|
"loss": 1.1305, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.474025974025974, |
|
"grad_norm": 0.2635696828365326, |
|
"learning_rate": 1.5588644315888978e-06, |
|
"loss": 1.0887, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.4805194805194806, |
|
"grad_norm": 0.30404505133628845, |
|
"learning_rate": 1.5208145699811417e-06, |
|
"loss": 1.1176, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.487012987012987, |
|
"grad_norm": 0.29464152455329895, |
|
"learning_rate": 1.4831513786338126e-06, |
|
"loss": 1.1724, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.4935064935064934, |
|
"grad_norm": 0.3045382499694824, |
|
"learning_rate": 1.4458790433646264e-06, |
|
"loss": 1.1151, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.4935064935064934, |
|
"eval_loss": 1.1553453207015991, |
|
"eval_runtime": 25.2983, |
|
"eval_samples_per_second": 11.661, |
|
"eval_steps_per_second": 1.463, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.2871938943862915, |
|
"learning_rate": 1.4090017065522731e-06, |
|
"loss": 1.1687, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.5064935064935066, |
|
"grad_norm": 0.2857874035835266, |
|
"learning_rate": 1.3725234666760428e-06, |
|
"loss": 1.1089, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.512987012987013, |
|
"grad_norm": 0.3428025245666504, |
|
"learning_rate": 1.3364483778603272e-06, |
|
"loss": 1.1335, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.5194805194805194, |
|
"grad_norm": 0.3187818229198456, |
|
"learning_rate": 1.3007804494240478e-06, |
|
"loss": 1.163, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.525974025974026, |
|
"grad_norm": 0.28887903690338135, |
|
"learning_rate": 1.2655236454350777e-06, |
|
"loss": 1.119, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 1.5324675324675323, |
|
"grad_norm": 0.28905734419822693, |
|
"learning_rate": 1.2306818842696716e-06, |
|
"loss": 1.1448, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.5389610389610389, |
|
"grad_norm": 0.25675976276397705, |
|
"learning_rate": 1.1962590381769923e-06, |
|
"loss": 1.1513, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.5454545454545454, |
|
"grad_norm": 0.2782272696495056, |
|
"learning_rate": 1.1622589328487505e-06, |
|
"loss": 1.0953, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.551948051948052, |
|
"grad_norm": 0.2892334461212158, |
|
"learning_rate": 1.128685346994028e-06, |
|
"loss": 1.1721, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.5584415584415585, |
|
"grad_norm": 0.31845828890800476, |
|
"learning_rate": 1.09554201191932e-06, |
|
"loss": 1.1076, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.564935064935065, |
|
"grad_norm": 0.27880439162254333, |
|
"learning_rate": 1.0628326111138377e-06, |
|
"loss": 1.1691, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.5714285714285714, |
|
"grad_norm": 0.2526175081729889, |
|
"learning_rate": 1.03056077984014e-06, |
|
"loss": 1.1443, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.577922077922078, |
|
"grad_norm": 0.2798580527305603, |
|
"learning_rate": 9.98730104730115e-07, |
|
"loss": 1.1156, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 1.5844155844155843, |
|
"grad_norm": 0.3181169927120209, |
|
"learning_rate": 9.673441233863661e-07, |
|
"loss": 1.0997, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.5909090909090908, |
|
"grad_norm": 0.2808782756328583, |
|
"learning_rate": 9.364063239890476e-07, |
|
"loss": 1.1658, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.5974025974025974, |
|
"grad_norm": 0.2726938724517822, |
|
"learning_rate": 9.059201449082045e-07, |
|
"loss": 1.0976, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.603896103896104, |
|
"grad_norm": 0.29625144600868225, |
|
"learning_rate": 8.758889743216247e-07, |
|
"loss": 1.1165, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.6103896103896105, |
|
"grad_norm": 0.29083919525146484, |
|
"learning_rate": 8.463161498382949e-07, |
|
"loss": 1.158, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.616883116883117, |
|
"grad_norm": 0.27802902460098267, |
|
"learning_rate": 8.172049581274571e-07, |
|
"loss": 1.1136, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.6233766233766234, |
|
"grad_norm": 0.2722671627998352, |
|
"learning_rate": 7.885586345533397e-07, |
|
"loss": 1.1543, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.62987012987013, |
|
"grad_norm": 0.26996558904647827, |
|
"learning_rate": 7.603803628155821e-07, |
|
"loss": 1.1196, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.6363636363636362, |
|
"grad_norm": 0.2988632023334503, |
|
"learning_rate": 7.326732745954001e-07, |
|
"loss": 1.1126, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.6428571428571428, |
|
"grad_norm": 0.2721429467201233, |
|
"learning_rate": 7.054404492075512e-07, |
|
"loss": 1.1334, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.6493506493506493, |
|
"grad_norm": 0.2960205078125, |
|
"learning_rate": 6.786849132580841e-07, |
|
"loss": 1.1289, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.655844155844156, |
|
"grad_norm": 0.2805614173412323, |
|
"learning_rate": 6.524096403079861e-07, |
|
"loss": 1.1566, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.6623376623376624, |
|
"grad_norm": 0.26631271839141846, |
|
"learning_rate": 6.266175505426958e-07, |
|
"loss": 1.1385, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.6688311688311688, |
|
"grad_norm": 0.2959340810775757, |
|
"learning_rate": 6.013115104475653e-07, |
|
"loss": 1.061, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.6753246753246753, |
|
"grad_norm": 0.3008975088596344, |
|
"learning_rate": 5.76494332489278e-07, |
|
"loss": 1.1383, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.6818181818181817, |
|
"grad_norm": 0.28491881489753723, |
|
"learning_rate": 5.521687748032805e-07, |
|
"loss": 1.1827, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.6883116883116882, |
|
"grad_norm": 0.2625648081302643, |
|
"learning_rate": 5.283375408872538e-07, |
|
"loss": 1.1496, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.6948051948051948, |
|
"grad_norm": 0.29310908913612366, |
|
"learning_rate": 5.05003279300637e-07, |
|
"loss": 1.1256, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.7012987012987013, |
|
"grad_norm": 0.28792184591293335, |
|
"learning_rate": 4.82168583370285e-07, |
|
"loss": 1.1269, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.7077922077922079, |
|
"grad_norm": 0.3059506118297577, |
|
"learning_rate": 4.598359909022443e-07, |
|
"loss": 1.1275, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.7142857142857144, |
|
"grad_norm": 0.2944253087043762, |
|
"learning_rate": 4.380079838997087e-07, |
|
"loss": 1.0612, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.7207792207792207, |
|
"grad_norm": 0.3492392599582672, |
|
"learning_rate": 4.1668698828716994e-07, |
|
"loss": 1.1408, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.7272727272727273, |
|
"grad_norm": 0.2833608090877533, |
|
"learning_rate": 3.958753736408105e-07, |
|
"loss": 1.1345, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.7337662337662336, |
|
"grad_norm": 0.30626702308654785, |
|
"learning_rate": 3.7557545292514987e-07, |
|
"loss": 1.1533, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.7402597402597402, |
|
"grad_norm": 0.2807121276855469, |
|
"learning_rate": 3.557894822359864e-07, |
|
"loss": 1.1166, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.7467532467532467, |
|
"grad_norm": 0.31310421228408813, |
|
"learning_rate": 3.3651966054965734e-07, |
|
"loss": 1.1238, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.7467532467532467, |
|
"eval_loss": 1.153025507926941, |
|
"eval_runtime": 25.365, |
|
"eval_samples_per_second": 11.63, |
|
"eval_steps_per_second": 1.459, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.7532467532467533, |
|
"grad_norm": 0.29958927631378174, |
|
"learning_rate": 3.177681294786539e-07, |
|
"loss": 1.1579, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.7597402597402598, |
|
"grad_norm": 0.3180530071258545, |
|
"learning_rate": 2.995369730336012e-07, |
|
"loss": 1.0656, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.7662337662337664, |
|
"grad_norm": 0.30499908328056335, |
|
"learning_rate": 2.8182821739164534e-07, |
|
"loss": 1.0744, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.7727272727272727, |
|
"grad_norm": 0.2803168296813965, |
|
"learning_rate": 2.6464383067127175e-07, |
|
"loss": 1.0652, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.7792207792207793, |
|
"grad_norm": 0.2866427004337311, |
|
"learning_rate": 2.479857227135685e-07, |
|
"loss": 1.1175, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.7857142857142856, |
|
"grad_norm": 0.26658734679222107, |
|
"learning_rate": 2.3185574486997264e-07, |
|
"loss": 1.1494, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.7922077922077921, |
|
"grad_norm": 0.3132326602935791, |
|
"learning_rate": 2.1625568979651012e-07, |
|
"loss": 1.0939, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.7987012987012987, |
|
"grad_norm": 0.29077062010765076, |
|
"learning_rate": 2.0118729125457036e-07, |
|
"loss": 1.1686, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.8051948051948052, |
|
"grad_norm": 0.2711000144481659, |
|
"learning_rate": 1.866522239182117e-07, |
|
"loss": 1.1246, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.8116883116883118, |
|
"grad_norm": 0.28556856513023376, |
|
"learning_rate": 1.7265210318804683e-07, |
|
"loss": 1.0459, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.8181818181818183, |
|
"grad_norm": 0.30168288946151733, |
|
"learning_rate": 1.5918848501170647e-07, |
|
"loss": 1.1087, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.8246753246753247, |
|
"grad_norm": 0.30205830931663513, |
|
"learning_rate": 1.4626286571091664e-07, |
|
"loss": 1.1333, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.8311688311688312, |
|
"grad_norm": 0.2765739858150482, |
|
"learning_rate": 1.338766818151982e-07, |
|
"loss": 1.0988, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.8376623376623376, |
|
"grad_norm": 0.3049326241016388, |
|
"learning_rate": 1.2203130990221635e-07, |
|
"loss": 1.0979, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.844155844155844, |
|
"grad_norm": 0.2744823694229126, |
|
"learning_rate": 1.107280664447874e-07, |
|
"loss": 1.1496, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.8506493506493507, |
|
"grad_norm": 0.2835923731327057, |
|
"learning_rate": 9.996820766456916e-08, |
|
"loss": 1.0894, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.8571428571428572, |
|
"grad_norm": 0.31562480330467224, |
|
"learning_rate": 8.975292939244928e-08, |
|
"loss": 1.1204, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.8636363636363638, |
|
"grad_norm": 0.26750344038009644, |
|
"learning_rate": 8.008336693563823e-08, |
|
"loss": 1.1243, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.87012987012987, |
|
"grad_norm": 0.26439955830574036, |
|
"learning_rate": 7.096059495149855e-08, |
|
"loss": 1.1027, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.8766233766233766, |
|
"grad_norm": 0.2618618905544281, |
|
"learning_rate": 6.238562732810427e-08, |
|
"loss": 1.0328, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.883116883116883, |
|
"grad_norm": 0.3233661949634552, |
|
"learning_rate": 5.435941707156389e-08, |
|
"loss": 1.1159, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.8896103896103895, |
|
"grad_norm": 0.28882384300231934, |
|
"learning_rate": 4.6882856200101135e-08, |
|
"loss": 1.1039, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.896103896103896, |
|
"grad_norm": 0.3103022277355194, |
|
"learning_rate": 3.99567756449204e-08, |
|
"loss": 1.1315, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.9025974025974026, |
|
"grad_norm": 0.2719733715057373, |
|
"learning_rate": 3.358194515785784e-08, |
|
"loss": 1.1128, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.9090909090909092, |
|
"grad_norm": 0.266874223947525, |
|
"learning_rate": 2.77590732258326e-08, |
|
"loss": 1.1333, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.9155844155844157, |
|
"grad_norm": 0.2862412631511688, |
|
"learning_rate": 2.2488806992105317e-08, |
|
"loss": 1.1387, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.922077922077922, |
|
"grad_norm": 0.3009204864501953, |
|
"learning_rate": 1.7771732184357905e-08, |
|
"loss": 1.1328, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.9285714285714286, |
|
"grad_norm": 0.28494757413864136, |
|
"learning_rate": 1.3608373049596724e-08, |
|
"loss": 1.1189, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.935064935064935, |
|
"grad_norm": 0.26805561780929565, |
|
"learning_rate": 9.999192295886973e-09, |
|
"loss": 1.0664, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.9415584415584415, |
|
"grad_norm": 0.3299916088581085, |
|
"learning_rate": 6.944591040930481e-09, |
|
"loss": 1.1117, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.948051948051948, |
|
"grad_norm": 0.2890622913837433, |
|
"learning_rate": 4.444908767484712e-09, |
|
"loss": 1.1396, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.9545454545454546, |
|
"grad_norm": 0.28758740425109863, |
|
"learning_rate": 2.500423285632381e-09, |
|
"loss": 1.1277, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.9610389610389611, |
|
"grad_norm": 0.2983172535896301, |
|
"learning_rate": 1.111350701909486e-09, |
|
"loss": 1.1452, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.9675324675324677, |
|
"grad_norm": 0.30814212560653687, |
|
"learning_rate": 2.7784539528397104e-10, |
|
"loss": 1.0888, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.974025974025974, |
|
"grad_norm": 0.2840464115142822, |
|
"learning_rate": 0.0, |
|
"loss": 1.1205, |
|
"step": 308 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 308, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 77, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.703755799815258e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|