{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 410, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04878048780487805, "grad_norm": 0.4036892056465149, "learning_rate": 0.00019999478113897612, "loss": 1.0282, "step": 10 }, { "epoch": 0.0975609756097561, "grad_norm": 0.3629762828350067, "learning_rate": 0.0001999791251006346, "loss": 0.7875, "step": 20 }, { "epoch": 0.14634146341463414, "grad_norm": 0.4877622425556183, "learning_rate": 0.0001999530335191093, "loss": 0.5942, "step": 30 }, { "epoch": 0.1951219512195122, "grad_norm": 0.4466260075569153, "learning_rate": 0.00019991650911776695, "loss": 0.3866, "step": 40 }, { "epoch": 0.24390243902439024, "grad_norm": 0.649118959903717, "learning_rate": 0.000199869555708923, "loss": 0.3928, "step": 50 }, { "epoch": 0.2926829268292683, "grad_norm": 0.8762800097465515, "learning_rate": 0.0001998121781934438, "loss": 0.3258, "step": 60 }, { "epoch": 0.34146341463414637, "grad_norm": 0.9195622801780701, "learning_rate": 0.0001997443825602349, "loss": 0.2885, "step": 70 }, { "epoch": 0.3902439024390244, "grad_norm": 0.5856262445449829, "learning_rate": 0.00019966617588561609, "loss": 0.2888, "step": 80 }, { "epoch": 0.43902439024390244, "grad_norm": 0.5520443320274353, "learning_rate": 0.00019957756633258265, "loss": 0.2242, "step": 90 }, { "epoch": 0.4878048780487805, "grad_norm": 0.9435800909996033, "learning_rate": 0.00019947856314995349, "loss": 0.1629, "step": 100 }, { "epoch": 0.5365853658536586, "grad_norm": 0.9416623115539551, "learning_rate": 0.00019936917667140555, "loss": 0.1555, "step": 110 }, { "epoch": 0.5853658536585366, "grad_norm": 0.802065372467041, "learning_rate": 0.0001992494183143955, "loss": 0.1339, "step": 120 }, { "epoch": 0.6341463414634146, "grad_norm": 0.7007794380187988, "learning_rate": 0.00019911930057896774, "loss": 0.1191, "step": 130 }, { "epoch": 0.6829268292682927, "grad_norm": 0.6755990386009216, "learning_rate": 0.00019897883704644983, "loss": 0.1571, "step": 140 }, { "epoch": 0.7317073170731707, "grad_norm": 1.6951078176498413, "learning_rate": 0.00019882804237803488, "loss": 0.1309, "step": 150 }, { "epoch": 0.7804878048780488, "grad_norm": 0.567158579826355, "learning_rate": 0.0001986669323132512, "loss": 0.0766, "step": 160 }, { "epoch": 0.8292682926829268, "grad_norm": 0.8820038437843323, "learning_rate": 0.0001984955236683196, "loss": 0.0839, "step": 170 }, { "epoch": 0.8780487804878049, "grad_norm": 0.6520794034004211, "learning_rate": 0.00019831383433439797, "loss": 0.0863, "step": 180 }, { "epoch": 0.926829268292683, "grad_norm": 0.45519864559173584, "learning_rate": 0.00019812188327571399, "loss": 0.0889, "step": 190 }, { "epoch": 0.975609756097561, "grad_norm": 0.614235520362854, "learning_rate": 0.00019791969052758562, "loss": 0.0725, "step": 200 }, { "epoch": 1.024390243902439, "grad_norm": 0.2764686644077301, "learning_rate": 0.00019770727719432994, "loss": 0.0407, "step": 210 }, { "epoch": 1.0731707317073171, "grad_norm": 0.6082726716995239, "learning_rate": 0.00019748466544706022, "loss": 0.044, "step": 220 }, { "epoch": 1.1219512195121952, "grad_norm": 0.9295619130134583, "learning_rate": 0.00019725187852137195, "loss": 0.0675, "step": 230 }, { "epoch": 1.170731707317073, "grad_norm": 0.3758924603462219, "learning_rate": 0.00019700894071491732, "loss": 0.0439, "step": 240 }, { "epoch": 1.2195121951219512, "grad_norm": 0.46514585614204407, "learning_rate": 0.00019675587738486936, "loss": 0.0398, "step": 250 }, { "epoch": 1.2682926829268293, "grad_norm": 0.5870018005371094, "learning_rate": 0.0001964927149452751, "loss": 0.0406, "step": 260 }, { "epoch": 1.3170731707317074, "grad_norm": 0.30292996764183044, "learning_rate": 0.00019621948086429844, "loss": 0.028, "step": 270 }, { "epoch": 1.3658536585365852, "grad_norm": 0.47037121653556824, "learning_rate": 0.00019593620366135337, "loss": 0.0239, "step": 280 }, { "epoch": 1.4146341463414633, "grad_norm": 0.4176475405693054, "learning_rate": 0.00019564291290412688, "loss": 0.0281, "step": 290 }, { "epoch": 1.4634146341463414, "grad_norm": 0.3179157078266144, "learning_rate": 0.00019533963920549306, "loss": 0.0281, "step": 300 }, { "epoch": 1.5121951219512195, "grad_norm": 0.5817562937736511, "learning_rate": 0.00019502641422031763, "loss": 0.0296, "step": 310 }, { "epoch": 1.5609756097560976, "grad_norm": 0.7409655451774597, "learning_rate": 0.00019470327064215383, "loss": 0.029, "step": 320 }, { "epoch": 1.6097560975609757, "grad_norm": 0.4418310225009918, "learning_rate": 0.00019437024219983028, "loss": 0.0583, "step": 330 }, { "epoch": 1.6585365853658538, "grad_norm": 0.31637728214263916, "learning_rate": 0.0001940273636539301, "loss": 0.0354, "step": 340 }, { "epoch": 1.7073170731707317, "grad_norm": 0.22175493836402893, "learning_rate": 0.00019367467079316279, "loss": 0.0514, "step": 350 }, { "epoch": 1.7560975609756098, "grad_norm": 0.6636152267456055, "learning_rate": 0.00019331220043062894, "loss": 0.034, "step": 360 }, { "epoch": 1.8048780487804879, "grad_norm": 0.8424332141876221, "learning_rate": 0.00019293999039997746, "loss": 0.0299, "step": 370 }, { "epoch": 1.8536585365853657, "grad_norm": 0.6435155272483826, "learning_rate": 0.00019255807955145677, "loss": 0.0508, "step": 380 }, { "epoch": 1.9024390243902438, "grad_norm": 0.7734220027923584, "learning_rate": 0.00019216650774785972, "loss": 0.035, "step": 390 }, { "epoch": 1.951219512195122, "grad_norm": 0.2854250967502594, "learning_rate": 0.0001917653158603628, "loss": 0.0339, "step": 400 }, { "epoch": 2.0, "grad_norm": 0.6165639758110046, "learning_rate": 0.0001913545457642601, "loss": 0.0323, "step": 410 } ], "logging_steps": 10, "max_steps": 3075, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.64317874233344e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }