{ "best_metric": 0.7384209036827087, "best_model_checkpoint": "Phi-3.5-mini-instruct_text_to_sql_vera\\checkpoint-2000", "epoch": 3.0, "eval_steps": 50, "global_step": 2013, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07451564828614009, "grad_norm": 0.24109168350696564, "learning_rate": 0.0002, "loss": 1.6897, "step": 50 }, { "epoch": 0.07451564828614009, "eval_loss": 1.628873586654663, "eval_runtime": 21.2268, "eval_samples_per_second": 5.842, "eval_steps_per_second": 0.754, "step": 50 }, { "epoch": 0.14903129657228018, "grad_norm": 0.20446562767028809, "learning_rate": 0.0001996800092633612, "loss": 1.4635, "step": 100 }, { "epoch": 0.14903129657228018, "eval_loss": 1.3007014989852905, "eval_runtime": 21.2354, "eval_samples_per_second": 5.839, "eval_steps_per_second": 0.753, "step": 100 }, { "epoch": 0.22354694485842028, "grad_norm": 0.15634676814079285, "learning_rate": 0.00019872208493487546, "loss": 1.1822, "step": 150 }, { "epoch": 0.22354694485842028, "eval_loss": 1.1080372333526611, "eval_runtime": 21.2322, "eval_samples_per_second": 5.84, "eval_steps_per_second": 0.754, "step": 150 }, { "epoch": 0.29806259314456035, "grad_norm": 0.060426872223615646, "learning_rate": 0.0001971323575527731, "loss": 1.0493, "step": 200 }, { "epoch": 0.29806259314456035, "eval_loss": 1.023650884628296, "eval_runtime": 23.9023, "eval_samples_per_second": 5.188, "eval_steps_per_second": 0.669, "step": 200 }, { "epoch": 0.37257824143070045, "grad_norm": 0.04907995089888573, "learning_rate": 0.0001949210010777752, "loss": 0.9908, "step": 250 }, { "epoch": 0.37257824143070045, "eval_loss": 0.9761372208595276, "eval_runtime": 22.3409, "eval_samples_per_second": 5.55, "eval_steps_per_second": 0.716, "step": 250 }, { "epoch": 0.44709388971684055, "grad_norm": 0.03899872675538063, "learning_rate": 0.00019210216778162994, "loss": 0.9539, "step": 300 }, { "epoch": 0.44709388971684055, "eval_loss": 0.9452499151229858, "eval_runtime": 22.4213, "eval_samples_per_second": 5.53, "eval_steps_per_second": 0.714, "step": 300 }, { "epoch": 0.5216095380029806, "grad_norm": 0.039957139641046524, "learning_rate": 0.0001886938976751951, "loss": 0.9301, "step": 350 }, { "epoch": 0.5216095380029806, "eval_loss": 0.9224702715873718, "eval_runtime": 22.4498, "eval_samples_per_second": 5.523, "eval_steps_per_second": 0.713, "step": 350 }, { "epoch": 0.5961251862891207, "grad_norm": 0.025905804708600044, "learning_rate": 0.00018471800305571129, "loss": 0.9065, "step": 400 }, { "epoch": 0.5961251862891207, "eval_loss": 0.9051175713539124, "eval_runtime": 21.5395, "eval_samples_per_second": 5.757, "eval_steps_per_second": 0.743, "step": 400 }, { "epoch": 0.6706408345752608, "grad_norm": 0.020609112456440926, "learning_rate": 0.00018019992891214008, "loss": 0.8916, "step": 450 }, { "epoch": 0.6706408345752608, "eval_loss": 0.8914369940757751, "eval_runtime": 21.6547, "eval_samples_per_second": 5.726, "eval_steps_per_second": 0.739, "step": 450 }, { "epoch": 0.7451564828614009, "grad_norm": 0.01633381098508835, "learning_rate": 0.00017516859008194938, "loss": 0.8799, "step": 500 }, { "epoch": 0.7451564828614009, "eval_loss": 0.880242109298706, "eval_runtime": 21.6819, "eval_samples_per_second": 5.719, "eval_steps_per_second": 0.738, "step": 500 }, { "epoch": 0.819672131147541, "grad_norm": 0.015875136479735374, "learning_rate": 0.00016965618620151017, "loss": 0.8691, "step": 550 }, { "epoch": 0.819672131147541, "eval_loss": 0.8673625588417053, "eval_runtime": 23.0842, "eval_samples_per_second": 5.372, "eval_steps_per_second": 0.693, "step": 550 }, { "epoch": 0.8941877794336811, "grad_norm": 0.015761110931634903, "learning_rate": 0.00016369799563438958, "loss": 0.8574, "step": 600 }, { "epoch": 0.8941877794336811, "eval_loss": 0.8584086894989014, "eval_runtime": 21.4811, "eval_samples_per_second": 5.773, "eval_steps_per_second": 0.745, "step": 600 }, { "epoch": 0.9687034277198212, "grad_norm": 0.013795309700071812, "learning_rate": 0.00015733214969635968, "loss": 0.854, "step": 650 }, { "epoch": 0.9687034277198212, "eval_loss": 0.8501254916191101, "eval_runtime": 21.7846, "eval_samples_per_second": 5.692, "eval_steps_per_second": 0.734, "step": 650 }, { "epoch": 1.0432190760059612, "grad_norm": 0.01314204279333353, "learning_rate": 0.00015059938862204127, "loss": 0.8436, "step": 700 }, { "epoch": 1.0432190760059612, "eval_loss": 0.8421516418457031, "eval_runtime": 21.4589, "eval_samples_per_second": 5.778, "eval_steps_per_second": 0.746, "step": 700 }, { "epoch": 1.1177347242921014, "grad_norm": 0.014487664215266705, "learning_rate": 0.00014354280083495006, "loss": 0.8402, "step": 750 }, { "epoch": 1.1177347242921014, "eval_loss": 0.8345963358879089, "eval_runtime": 21.5388, "eval_samples_per_second": 5.757, "eval_steps_per_second": 0.743, "step": 750 }, { "epoch": 1.1922503725782414, "grad_norm": 0.012593415565788746, "learning_rate": 0.000136207547189569, "loss": 0.8302, "step": 800 }, { "epoch": 1.1922503725782414, "eval_loss": 0.8273870348930359, "eval_runtime": 21.494, "eval_samples_per_second": 5.769, "eval_steps_per_second": 0.744, "step": 800 }, { "epoch": 1.2667660208643814, "grad_norm": 0.011890560388565063, "learning_rate": 0.00012864057195024643, "loss": 0.8222, "step": 850 }, { "epoch": 1.2667660208643814, "eval_loss": 0.8204250931739807, "eval_runtime": 21.6934, "eval_samples_per_second": 5.716, "eval_steps_per_second": 0.738, "step": 850 }, { "epoch": 1.3412816691505216, "grad_norm": 0.01334091741591692, "learning_rate": 0.00012089030235660155, "loss": 0.81, "step": 900 }, { "epoch": 1.3412816691505216, "eval_loss": 0.8137892484664917, "eval_runtime": 22.3403, "eval_samples_per_second": 5.55, "eval_steps_per_second": 0.716, "step": 900 }, { "epoch": 1.4157973174366618, "grad_norm": 0.012158791534602642, "learning_rate": 0.00011300633869816275, "loss": 0.8081, "step": 950 }, { "epoch": 1.4157973174366618, "eval_loss": 0.807340681552887, "eval_runtime": 21.5174, "eval_samples_per_second": 5.763, "eval_steps_per_second": 0.744, "step": 950 }, { "epoch": 1.4903129657228018, "grad_norm": 0.011660942807793617, "learning_rate": 0.00010503913688170396, "loss": 0.7994, "step": 1000 }, { "epoch": 1.4903129657228018, "eval_loss": 0.8010362982749939, "eval_runtime": 21.55, "eval_samples_per_second": 5.754, "eval_steps_per_second": 0.742, "step": 1000 }, { "epoch": 1.5648286140089418, "grad_norm": 0.010989319533109665, "learning_rate": 9.703968552278915e-05, "loss": 0.7958, "step": 1050 }, { "epoch": 1.5648286140089418, "eval_loss": 0.7949212789535522, "eval_runtime": 21.402, "eval_samples_per_second": 5.794, "eval_steps_per_second": 0.748, "step": 1050 }, { "epoch": 1.639344262295082, "grad_norm": 0.011820629239082336, "learning_rate": 8.905917962807927e-05, "loss": 0.7893, "step": 1100 }, { "epoch": 1.639344262295082, "eval_loss": 0.7889001965522766, "eval_runtime": 21.7151, "eval_samples_per_second": 5.71, "eval_steps_per_second": 0.737, "step": 1100 }, { "epoch": 1.713859910581222, "grad_norm": 0.011526989750564098, "learning_rate": 8.114869295677425e-05, "loss": 0.7812, "step": 1150 }, { "epoch": 1.713859910581222, "eval_loss": 0.7832813858985901, "eval_runtime": 21.1842, "eval_samples_per_second": 5.853, "eval_steps_per_second": 0.755, "step": 1150 }, { "epoch": 1.788375558867362, "grad_norm": 0.013003380037844181, "learning_rate": 7.335885115801656e-05, "loss": 0.7846, "step": 1200 }, { "epoch": 1.788375558867362, "eval_loss": 0.7778643369674683, "eval_runtime": 21.7578, "eval_samples_per_second": 5.699, "eval_steps_per_second": 0.735, "step": 1200 }, { "epoch": 1.8628912071535022, "grad_norm": 0.011233557015657425, "learning_rate": 6.573950777611587e-05, "loss": 0.7695, "step": 1250 }, { "epoch": 1.8628912071535022, "eval_loss": 0.7728994488716125, "eval_runtime": 21.828, "eval_samples_per_second": 5.681, "eval_steps_per_second": 0.733, "step": 1250 }, { "epoch": 1.9374068554396424, "grad_norm": 0.011392244137823582, "learning_rate": 5.833942519710177e-05, "loss": 0.7697, "step": 1300 }, { "epoch": 1.9374068554396424, "eval_loss": 0.7685559988021851, "eval_runtime": 21.1933, "eval_samples_per_second": 5.851, "eval_steps_per_second": 0.755, "step": 1300 }, { "epoch": 2.0119225037257826, "grad_norm": 0.01131096575409174, "learning_rate": 5.1205962578487155e-05, "loss": 0.7606, "step": 1350 }, { "epoch": 2.0119225037257826, "eval_loss": 0.764735996723175, "eval_runtime": 21.1907, "eval_samples_per_second": 5.852, "eval_steps_per_second": 0.755, "step": 1350 }, { "epoch": 2.0864381520119224, "grad_norm": 0.012029374949634075, "learning_rate": 4.4384772759434425e-05, "loss": 0.7619, "step": 1400 }, { "epoch": 2.0864381520119224, "eval_loss": 0.7614848613739014, "eval_runtime": 21.1958, "eval_samples_per_second": 5.85, "eval_steps_per_second": 0.755, "step": 1400 }, { "epoch": 2.1609538002980626, "grad_norm": 0.011891283094882965, "learning_rate": 3.7919510091042566e-05, "loss": 0.7569, "step": 1450 }, { "epoch": 2.1609538002980626, "eval_loss": 0.7588434815406799, "eval_runtime": 21.1878, "eval_samples_per_second": 5.852, "eval_steps_per_second": 0.755, "step": 1450 }, { "epoch": 2.235469448584203, "grad_norm": 0.012809407897293568, "learning_rate": 3.185155105658798e-05, "loss": 0.7526, "step": 1500 }, { "epoch": 2.235469448584203, "eval_loss": 0.7565779089927673, "eval_runtime": 21.1534, "eval_samples_per_second": 5.862, "eval_steps_per_second": 0.756, "step": 1500 }, { "epoch": 2.3099850968703426, "grad_norm": 0.01183687336742878, "learning_rate": 2.6219729469697318e-05, "loss": 0.7504, "step": 1550 }, { "epoch": 2.3099850968703426, "eval_loss": 0.7436912655830383, "eval_runtime": 21.8493, "eval_samples_per_second": 5.858, "eval_steps_per_second": 0.732, "step": 1550 }, { "epoch": 2.384500745156483, "grad_norm": 0.011195004917681217, "learning_rate": 2.1060087945134677e-05, "loss": 0.7553, "step": 1600 }, { "epoch": 2.384500745156483, "eval_loss": 0.7422465682029724, "eval_runtime": 21.8534, "eval_samples_per_second": 5.857, "eval_steps_per_second": 0.732, "step": 1600 }, { "epoch": 2.459016393442623, "grad_norm": 0.010784263722598553, "learning_rate": 1.640564723274486e-05, "loss": 0.7507, "step": 1650 }, { "epoch": 2.459016393442623, "eval_loss": 0.7410176396369934, "eval_runtime": 21.8273, "eval_samples_per_second": 5.864, "eval_steps_per_second": 0.733, "step": 1650 }, { "epoch": 2.533532041728763, "grad_norm": 0.011086920276284218, "learning_rate": 1.2286194890771819e-05, "loss": 0.7456, "step": 1700 }, { "epoch": 2.533532041728763, "eval_loss": 0.7401486039161682, "eval_runtime": 21.8582, "eval_samples_per_second": 5.856, "eval_steps_per_second": 0.732, "step": 1700 }, { "epoch": 2.608047690014903, "grad_norm": 0.011054403148591518, "learning_rate": 8.728094651004648e-06, "loss": 0.7448, "step": 1750 }, { "epoch": 2.608047690014903, "eval_loss": 0.7393685579299927, "eval_runtime": 21.8448, "eval_samples_per_second": 5.86, "eval_steps_per_second": 0.732, "step": 1750 }, { "epoch": 2.682563338301043, "grad_norm": 0.010735660791397095, "learning_rate": 5.7541176957785605e-06, "loss": 0.7487, "step": 1800 }, { "epoch": 2.682563338301043, "eval_loss": 0.7390720248222351, "eval_runtime": 22.2346, "eval_samples_per_second": 5.757, "eval_steps_per_second": 0.72, "step": 1800 }, { "epoch": 2.7570789865871834, "grad_norm": 0.011423379182815552, "learning_rate": 3.3832969266265112e-06, "loss": 0.7476, "step": 1850 }, { "epoch": 2.7570789865871834, "eval_loss": 0.7386873960494995, "eval_runtime": 21.9986, "eval_samples_per_second": 5.819, "eval_steps_per_second": 0.727, "step": 1850 }, { "epoch": 2.8315946348733236, "grad_norm": 0.012488750740885735, "learning_rate": 1.6308051572357152e-06, "loss": 0.749, "step": 1900 }, { "epoch": 2.8315946348733236, "eval_loss": 0.7386685609817505, "eval_runtime": 22.2521, "eval_samples_per_second": 5.752, "eval_steps_per_second": 0.719, "step": 1900 }, { "epoch": 2.9061102831594634, "grad_norm": 0.010972470976412296, "learning_rate": 5.078580102509834e-07, "loss": 0.7461, "step": 1950 }, { "epoch": 2.9061102831594634, "eval_loss": 0.7385600805282593, "eval_runtime": 23.4712, "eval_samples_per_second": 5.453, "eval_steps_per_second": 0.682, "step": 1950 }, { "epoch": 2.9806259314456036, "grad_norm": 0.012074621394276619, "learning_rate": 2.164213936770576e-08, "loss": 0.7486, "step": 2000 }, { "epoch": 2.9806259314456036, "eval_loss": 0.7384209036827087, "eval_runtime": 22.0471, "eval_samples_per_second": 5.806, "eval_steps_per_second": 0.726, "step": 2000 } ], "logging_steps": 50, "max_steps": 2013, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.365572698728038e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }