{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.7748934521503293, "eval_steps": 500, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.012914890869172156, "grad_norm": 0.9891569018363953, "learning_rate": 1.974170218261656e-05, "loss": 1.6595, "step": 100 }, { "epoch": 0.025829781738344312, "grad_norm": 0.24901358783245087, "learning_rate": 1.9483404365233117e-05, "loss": 1.4216, "step": 200 }, { "epoch": 0.03874467260751647, "grad_norm": 1.108109951019287, "learning_rate": 1.922510654784967e-05, "loss": 1.383, "step": 300 }, { "epoch": 0.051659563476688625, "grad_norm": 0.7391151785850525, "learning_rate": 1.896680873046623e-05, "loss": 1.408, "step": 400 }, { "epoch": 0.06457445434586077, "grad_norm": 0.9414256811141968, "learning_rate": 1.8708510913082787e-05, "loss": 1.3439, "step": 500 }, { "epoch": 0.07748934521503294, "grad_norm": 0.7522476315498352, "learning_rate": 1.845021309569934e-05, "loss": 1.272, "step": 600 }, { "epoch": 0.09040423608420509, "grad_norm": 0.32411837577819824, "learning_rate": 1.81919152783159e-05, "loss": 1.16, "step": 700 }, { "epoch": 0.10331912695337725, "grad_norm": 0.8460651636123657, "learning_rate": 1.7933617460932457e-05, "loss": 1.1753, "step": 800 }, { "epoch": 0.1162340178225494, "grad_norm": 1.0593210458755493, "learning_rate": 1.7675319643549015e-05, "loss": 1.2417, "step": 900 }, { "epoch": 0.12914890869172155, "grad_norm": 1.2058868408203125, "learning_rate": 1.741702182616557e-05, "loss": 1.381, "step": 1000 }, { "epoch": 0.1420637995608937, "grad_norm": 0.36752256751060486, "learning_rate": 1.7158724008782127e-05, "loss": 1.2031, "step": 1100 }, { "epoch": 0.15497869043006587, "grad_norm": 0.3617095351219177, "learning_rate": 1.6900426191398685e-05, "loss": 1.2186, "step": 1200 }, { "epoch": 0.167893581299238, "grad_norm": 1.2740339040756226, "learning_rate": 1.664212837401524e-05, "loss": 1.2354, "step": 1300 }, { "epoch": 0.18080847216841017, "grad_norm": 1.0895512104034424, "learning_rate": 1.6383830556631797e-05, "loss": 1.1308, "step": 1400 }, { "epoch": 0.19372336303758234, "grad_norm": 0.28641125559806824, "learning_rate": 1.6125532739248355e-05, "loss": 1.1459, "step": 1500 }, { "epoch": 0.2066382539067545, "grad_norm": 0.7633489966392517, "learning_rate": 1.5867234921864912e-05, "loss": 1.083, "step": 1600 }, { "epoch": 0.21955314477592663, "grad_norm": 0.9854117035865784, "learning_rate": 1.5608937104481467e-05, "loss": 1.1526, "step": 1700 }, { "epoch": 0.2324680356450988, "grad_norm": 1.0833749771118164, "learning_rate": 1.5350639287098025e-05, "loss": 1.0866, "step": 1800 }, { "epoch": 0.24538292651427096, "grad_norm": 1.2186298370361328, "learning_rate": 1.5092341469714582e-05, "loss": 1.0898, "step": 1900 }, { "epoch": 0.2582978173834431, "grad_norm": 0.976441502571106, "learning_rate": 1.4834043652331138e-05, "loss": 1.2018, "step": 2000 }, { "epoch": 0.2712127082526153, "grad_norm": 1.0837169885635376, "learning_rate": 1.4575745834947696e-05, "loss": 1.1264, "step": 2100 }, { "epoch": 0.2841275991217874, "grad_norm": 0.8157379031181335, "learning_rate": 1.4317448017564252e-05, "loss": 1.2216, "step": 2200 }, { "epoch": 0.29704248999095956, "grad_norm": 0.4470981955528259, "learning_rate": 1.4059150200180808e-05, "loss": 1.1543, "step": 2300 }, { "epoch": 0.30995738086013175, "grad_norm": 0.9872229695320129, "learning_rate": 1.3800852382797368e-05, "loss": 1.1293, "step": 2400 }, { "epoch": 0.3228722717293039, "grad_norm": 1.2851194143295288, "learning_rate": 1.3542554565413924e-05, "loss": 1.1619, "step": 2500 }, { "epoch": 0.335787162598476, "grad_norm": 1.0529409646987915, "learning_rate": 1.328425674803048e-05, "loss": 1.0468, "step": 2600 }, { "epoch": 0.3487020534676482, "grad_norm": 1.2335134744644165, "learning_rate": 1.3025958930647038e-05, "loss": 1.1292, "step": 2700 }, { "epoch": 0.36161694433682035, "grad_norm": 1.165204405784607, "learning_rate": 1.2767661113263594e-05, "loss": 1.0324, "step": 2800 }, { "epoch": 0.37453183520599254, "grad_norm": 0.4545508623123169, "learning_rate": 1.250936329588015e-05, "loss": 1.0776, "step": 2900 }, { "epoch": 0.38744672607516467, "grad_norm": 1.1217703819274902, "learning_rate": 1.225106547849671e-05, "loss": 1.0673, "step": 3000 }, { "epoch": 0.4003616169443368, "grad_norm": 1.204528570175171, "learning_rate": 1.1992767661113265e-05, "loss": 1.1664, "step": 3100 }, { "epoch": 0.413276507813509, "grad_norm": 1.8890794515609741, "learning_rate": 1.1734469843729821e-05, "loss": 1.0947, "step": 3200 }, { "epoch": 0.42619139868268113, "grad_norm": 1.4399609565734863, "learning_rate": 1.147617202634638e-05, "loss": 1.115, "step": 3300 }, { "epoch": 0.43910628955185327, "grad_norm": 0.8998225331306458, "learning_rate": 1.1217874208962935e-05, "loss": 1.0569, "step": 3400 }, { "epoch": 0.45202118042102546, "grad_norm": 1.2747713327407837, "learning_rate": 1.0959576391579491e-05, "loss": 1.0758, "step": 3500 }, { "epoch": 0.4649360712901976, "grad_norm": 1.2398267984390259, "learning_rate": 1.0701278574196047e-05, "loss": 1.0722, "step": 3600 }, { "epoch": 0.47785096215936973, "grad_norm": 0.7766038179397583, "learning_rate": 1.0442980756812607e-05, "loss": 1.1556, "step": 3700 }, { "epoch": 0.4907658530285419, "grad_norm": 1.2244646549224854, "learning_rate": 1.0184682939429163e-05, "loss": 1.1066, "step": 3800 }, { "epoch": 0.5036807438977141, "grad_norm": 1.1265727281570435, "learning_rate": 9.926385122045719e-06, "loss": 1.0832, "step": 3900 }, { "epoch": 0.5165956347668862, "grad_norm": 1.4965732097625732, "learning_rate": 9.668087304662275e-06, "loss": 1.1945, "step": 4000 }, { "epoch": 0.5295105256360584, "grad_norm": 1.170291543006897, "learning_rate": 9.409789487278833e-06, "loss": 1.079, "step": 4100 }, { "epoch": 0.5424254165052306, "grad_norm": 0.47167250514030457, "learning_rate": 9.15149166989539e-06, "loss": 1.0811, "step": 4200 }, { "epoch": 0.5553403073744027, "grad_norm": 1.5530108213424683, "learning_rate": 8.893193852511947e-06, "loss": 1.0456, "step": 4300 }, { "epoch": 0.5682551982435748, "grad_norm": 1.426579475402832, "learning_rate": 8.634896035128504e-06, "loss": 1.0797, "step": 4400 }, { "epoch": 0.581170089112747, "grad_norm": 1.656485676765442, "learning_rate": 8.37659821774506e-06, "loss": 1.1384, "step": 4500 }, { "epoch": 0.5940849799819191, "grad_norm": 1.8213322162628174, "learning_rate": 8.118300400361617e-06, "loss": 1.0484, "step": 4600 }, { "epoch": 0.6069998708510913, "grad_norm": 0.9146257042884827, "learning_rate": 7.860002582978174e-06, "loss": 1.0798, "step": 4700 }, { "epoch": 0.6199147617202635, "grad_norm": 0.8420510292053223, "learning_rate": 7.601704765594732e-06, "loss": 1.1371, "step": 4800 }, { "epoch": 0.6328296525894356, "grad_norm": 1.728549599647522, "learning_rate": 7.343406948211288e-06, "loss": 1.1104, "step": 4900 }, { "epoch": 0.6457445434586078, "grad_norm": 0.6792052388191223, "learning_rate": 7.085109130827845e-06, "loss": 1.1497, "step": 5000 }, { "epoch": 0.65865943432778, "grad_norm": 1.167297124862671, "learning_rate": 6.826811313444401e-06, "loss": 1.1146, "step": 5100 }, { "epoch": 0.671574325196952, "grad_norm": 0.5329355001449585, "learning_rate": 6.568513496060959e-06, "loss": 1.0569, "step": 5200 }, { "epoch": 0.6844892160661242, "grad_norm": 1.3186862468719482, "learning_rate": 6.310215678677516e-06, "loss": 1.023, "step": 5300 }, { "epoch": 0.6974041069352964, "grad_norm": 1.4624109268188477, "learning_rate": 6.051917861294072e-06, "loss": 1.1174, "step": 5400 }, { "epoch": 0.7103189978044685, "grad_norm": 1.1120996475219727, "learning_rate": 5.79362004391063e-06, "loss": 1.048, "step": 5500 }, { "epoch": 0.7232338886736407, "grad_norm": 1.1070384979248047, "learning_rate": 5.535322226527187e-06, "loss": 1.1, "step": 5600 }, { "epoch": 0.7361487795428129, "grad_norm": 0.46000921726226807, "learning_rate": 5.277024409143743e-06, "loss": 1.0562, "step": 5700 }, { "epoch": 0.7490636704119851, "grad_norm": 1.4388511180877686, "learning_rate": 5.0187265917603005e-06, "loss": 1.0263, "step": 5800 }, { "epoch": 0.7619785612811572, "grad_norm": 1.0894064903259277, "learning_rate": 4.7604287743768566e-06, "loss": 1.0688, "step": 5900 }, { "epoch": 0.7748934521503293, "grad_norm": 0.36538398265838623, "learning_rate": 4.5021309569934135e-06, "loss": 1.0626, "step": 6000 } ], "logging_steps": 100, "max_steps": 7743, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 9.988164550656e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }