{ "best_metric": 1.2517019510269165, "best_model_checkpoint": "/mnt/bn/qingyi-bn-lq/llama/saved_llamaGuanaco/checkpoint-600", "epoch": 1.8810620162633487, "global_step": 600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 5.9999999999999995e-05, "loss": 1.5389, "step": 20 }, { "epoch": 0.13, "learning_rate": 0.00011999999999999999, "loss": 1.418, "step": 40 }, { "epoch": 0.19, "learning_rate": 0.00017999999999999998, "loss": 1.3622, "step": 60 }, { "epoch": 0.25, "learning_rate": 0.00023999999999999998, "loss": 1.3334, "step": 80 }, { "epoch": 0.31, "learning_rate": 0.0003, "loss": 1.3109, "step": 100 }, { "epoch": 0.38, "learning_rate": 0.00029297423887587816, "loss": 1.3046, "step": 120 }, { "epoch": 0.44, "learning_rate": 0.0002859484777517564, "loss": 1.2948, "step": 140 }, { "epoch": 0.5, "learning_rate": 0.00027892271662763465, "loss": 1.2885, "step": 160 }, { "epoch": 0.56, "learning_rate": 0.00027189695550351283, "loss": 1.2817, "step": 180 }, { "epoch": 0.63, "learning_rate": 0.0002648711943793911, "loss": 1.2706, "step": 200 }, { "epoch": 0.63, "eval_loss": 1.2975808382034302, "eval_runtime": 40.2385, "eval_samples_per_second": 49.704, "eval_steps_per_second": 0.795, "step": 200 }, { "epoch": 0.69, "learning_rate": 0.00025784543325526926, "loss": 1.2668, "step": 220 }, { "epoch": 0.75, "learning_rate": 0.00025081967213114756, "loss": 1.2699, "step": 240 }, { "epoch": 0.82, "learning_rate": 0.00024379391100702575, "loss": 1.2554, "step": 260 }, { "epoch": 0.88, "learning_rate": 0.00023676814988290396, "loss": 1.2508, "step": 280 }, { "epoch": 0.94, "learning_rate": 0.00022974238875878218, "loss": 1.2556, "step": 300 }, { "epoch": 1.0, "learning_rate": 0.00022271662763466042, "loss": 1.2497, "step": 320 }, { "epoch": 1.07, "learning_rate": 0.00021569086651053863, "loss": 1.2468, "step": 340 }, { "epoch": 1.13, "learning_rate": 0.00020866510538641685, "loss": 1.2428, "step": 360 }, { "epoch": 1.19, "learning_rate": 0.00020163934426229506, "loss": 1.2397, "step": 380 }, { "epoch": 1.25, "learning_rate": 0.0001946135831381733, "loss": 1.2474, "step": 400 }, { "epoch": 1.25, "eval_loss": 1.2675851583480835, "eval_runtime": 40.2072, "eval_samples_per_second": 49.742, "eval_steps_per_second": 0.796, "step": 400 }, { "epoch": 1.32, "learning_rate": 0.00018758782201405152, "loss": 1.2407, "step": 420 }, { "epoch": 1.38, "learning_rate": 0.00018056206088992973, "loss": 1.2348, "step": 440 }, { "epoch": 1.44, "learning_rate": 0.00017353629976580795, "loss": 1.228, "step": 460 }, { "epoch": 1.5, "learning_rate": 0.00016651053864168616, "loss": 1.233, "step": 480 }, { "epoch": 1.57, "learning_rate": 0.0001594847775175644, "loss": 1.2303, "step": 500 }, { "epoch": 1.63, "learning_rate": 0.00015245901639344262, "loss": 1.2255, "step": 520 }, { "epoch": 1.69, "learning_rate": 0.00014543325526932083, "loss": 1.2191, "step": 540 }, { "epoch": 1.76, "learning_rate": 0.00013840749414519905, "loss": 1.2211, "step": 560 }, { "epoch": 1.82, "learning_rate": 0.00013138173302107726, "loss": 1.2188, "step": 580 }, { "epoch": 1.88, "learning_rate": 0.0001243559718969555, "loss": 1.2184, "step": 600 }, { "epoch": 1.88, "eval_loss": 1.2517019510269165, "eval_runtime": 40.3057, "eval_samples_per_second": 49.621, "eval_steps_per_second": 0.794, "step": 600 } ], "max_steps": 954, "num_train_epochs": 3, "total_flos": 8.806186092274385e+18, "trial_name": null, "trial_params": null }