{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9943289224952743, "global_step": 198, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 0.0, "loss": 1.0585, "step": 1 }, { "epoch": 0.12, "learning_rate": 2e-05, "loss": 0.6923, "step": 8 }, { "epoch": 0.24, "learning_rate": 2e-05, "loss": 0.5149, "step": 16 }, { "epoch": 0.36, "learning_rate": 2e-05, "loss": 0.3899, "step": 24 }, { "epoch": 0.48, "learning_rate": 2e-05, "loss": 0.3511, "step": 32 }, { "epoch": 0.6, "learning_rate": 2e-05, "loss": 0.3321, "step": 40 }, { "epoch": 0.73, "learning_rate": 2e-05, "loss": 0.3254, "step": 48 }, { "epoch": 0.85, "learning_rate": 2e-05, "loss": 0.3104, "step": 56 }, { "epoch": 0.97, "learning_rate": 2e-05, "loss": 0.3007, "step": 64 }, { "epoch": 1.09, "learning_rate": 2e-05, "loss": 0.2617, "step": 72 }, { "epoch": 1.21, "learning_rate": 2e-05, "loss": 0.2454, "step": 80 }, { "epoch": 1.33, "learning_rate": 2e-05, "loss": 0.2362, "step": 88 }, { "epoch": 1.45, "learning_rate": 2e-05, "loss": 0.2334, "step": 96 }, { "epoch": 1.57, "learning_rate": 2e-05, "loss": 0.2275, "step": 104 }, { "epoch": 1.69, "learning_rate": 2e-05, "loss": 0.23, "step": 112 }, { "epoch": 1.81, "learning_rate": 2e-05, "loss": 0.217, "step": 120 }, { "epoch": 1.94, "learning_rate": 2e-05, "loss": 0.2205, "step": 128 }, { "epoch": 2.06, "learning_rate": 2e-05, "loss": 0.2031, "step": 136 }, { "epoch": 2.18, "learning_rate": 2e-05, "loss": 0.1629, "step": 144 }, { "epoch": 2.3, "learning_rate": 2e-05, "loss": 0.167, "step": 152 }, { "epoch": 2.42, "learning_rate": 2e-05, "loss": 0.1627, "step": 160 }, { "epoch": 2.54, "learning_rate": 2e-05, "loss": 0.1594, "step": 168 }, { "epoch": 2.66, "learning_rate": 2e-05, "loss": 0.1553, "step": 176 }, { "epoch": 2.78, "learning_rate": 2e-05, "loss": 0.1623, "step": 184 }, { "epoch": 2.9, "learning_rate": 2e-05, "loss": 0.1576, "step": 192 }, { "epoch": 2.99, "step": 198, "total_flos": 424189693526016.0, "train_loss": 0.2658990031540996, "train_runtime": 7827.5947, "train_samples_per_second": 3.244, "train_steps_per_second": 0.025 } ], "max_steps": 198, "num_train_epochs": 3, "total_flos": 424189693526016.0, "trial_name": null, "trial_params": null }