{ "best_metric": 1.2436273097991943, "best_model_checkpoint": "/mnt/bn/qingyi-bn-lq/llama/saved_llamaGuanaco/checkpoint-800", "epoch": 2.5080826883511316, "global_step": 800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 5.9999999999999995e-05, "loss": 1.5389, "step": 20 }, { "epoch": 0.13, "learning_rate": 0.00011999999999999999, "loss": 1.418, "step": 40 }, { "epoch": 0.19, "learning_rate": 0.00017999999999999998, "loss": 1.3622, "step": 60 }, { "epoch": 0.25, "learning_rate": 0.00023999999999999998, "loss": 1.3334, "step": 80 }, { "epoch": 0.31, "learning_rate": 0.0003, "loss": 1.3109, "step": 100 }, { "epoch": 0.38, "learning_rate": 0.00029297423887587816, "loss": 1.3046, "step": 120 }, { "epoch": 0.44, "learning_rate": 0.0002859484777517564, "loss": 1.2948, "step": 140 }, { "epoch": 0.5, "learning_rate": 0.00027892271662763465, "loss": 1.2885, "step": 160 }, { "epoch": 0.56, "learning_rate": 0.00027189695550351283, "loss": 1.2817, "step": 180 }, { "epoch": 0.63, "learning_rate": 0.0002648711943793911, "loss": 1.2706, "step": 200 }, { "epoch": 0.63, "eval_loss": 1.2975808382034302, "eval_runtime": 40.2385, "eval_samples_per_second": 49.704, "eval_steps_per_second": 0.795, "step": 200 }, { "epoch": 0.69, "learning_rate": 0.00025784543325526926, "loss": 1.2668, "step": 220 }, { "epoch": 0.75, "learning_rate": 0.00025081967213114756, "loss": 1.2699, "step": 240 }, { "epoch": 0.82, "learning_rate": 0.00024379391100702575, "loss": 1.2554, "step": 260 }, { "epoch": 0.88, "learning_rate": 0.00023676814988290396, "loss": 1.2508, "step": 280 }, { "epoch": 0.94, "learning_rate": 0.00022974238875878218, "loss": 1.2556, "step": 300 }, { "epoch": 1.0, "learning_rate": 0.00022271662763466042, "loss": 1.2497, "step": 320 }, { "epoch": 1.07, "learning_rate": 0.00021569086651053863, "loss": 1.2468, "step": 340 }, { "epoch": 1.13, "learning_rate": 0.00020866510538641685, "loss": 1.2428, "step": 360 }, { "epoch": 1.19, "learning_rate": 0.00020163934426229506, "loss": 1.2397, "step": 380 }, { "epoch": 1.25, "learning_rate": 0.0001946135831381733, "loss": 1.2474, "step": 400 }, { "epoch": 1.25, "eval_loss": 1.2675851583480835, "eval_runtime": 40.2072, "eval_samples_per_second": 49.742, "eval_steps_per_second": 0.796, "step": 400 }, { "epoch": 1.32, "learning_rate": 0.00018758782201405152, "loss": 1.2407, "step": 420 }, { "epoch": 1.38, "learning_rate": 0.00018056206088992973, "loss": 1.2348, "step": 440 }, { "epoch": 1.44, "learning_rate": 0.00017353629976580795, "loss": 1.228, "step": 460 }, { "epoch": 1.5, "learning_rate": 0.00016651053864168616, "loss": 1.233, "step": 480 }, { "epoch": 1.57, "learning_rate": 0.0001594847775175644, "loss": 1.2303, "step": 500 }, { "epoch": 1.63, "learning_rate": 0.00015245901639344262, "loss": 1.2255, "step": 520 }, { "epoch": 1.69, "learning_rate": 0.00014543325526932083, "loss": 1.2191, "step": 540 }, { "epoch": 1.76, "learning_rate": 0.00013840749414519905, "loss": 1.2211, "step": 560 }, { "epoch": 1.82, "learning_rate": 0.00013138173302107726, "loss": 1.2188, "step": 580 }, { "epoch": 1.88, "learning_rate": 0.0001243559718969555, "loss": 1.2184, "step": 600 }, { "epoch": 1.88, "eval_loss": 1.2517019510269165, "eval_runtime": 40.3057, "eval_samples_per_second": 49.621, "eval_steps_per_second": 0.794, "step": 600 }, { "epoch": 1.94, "learning_rate": 0.0001173302107728337, "loss": 1.2131, "step": 620 }, { "epoch": 2.01, "learning_rate": 0.00011030444964871193, "loss": 1.2169, "step": 640 }, { "epoch": 2.07, "learning_rate": 0.00010327868852459015, "loss": 1.2186, "step": 660 }, { "epoch": 2.13, "learning_rate": 9.625292740046838e-05, "loss": 1.2122, "step": 680 }, { "epoch": 2.19, "learning_rate": 8.922716627634659e-05, "loss": 1.2148, "step": 700 }, { "epoch": 2.26, "learning_rate": 8.220140515222482e-05, "loss": 1.2135, "step": 720 }, { "epoch": 2.32, "learning_rate": 7.517564402810303e-05, "loss": 1.2096, "step": 740 }, { "epoch": 2.38, "learning_rate": 6.814988290398126e-05, "loss": 1.2073, "step": 760 }, { "epoch": 2.45, "learning_rate": 6.112412177985948e-05, "loss": 1.2102, "step": 780 }, { "epoch": 2.51, "learning_rate": 5.40983606557377e-05, "loss": 1.2069, "step": 800 }, { "epoch": 2.51, "eval_loss": 1.2436273097991943, "eval_runtime": 40.1816, "eval_samples_per_second": 49.774, "eval_steps_per_second": 0.796, "step": 800 } ], "max_steps": 954, "num_train_epochs": 3, "total_flos": 1.1739466648829034e+19, "trial_name": null, "trial_params": null }