{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.998433420365535, "eval_steps": 500, "global_step": 2871, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.5221932114882507, "grad_norm": 6.875, "learning_rate": 1.856369613349391e-05, "loss": 0.7949, "step": 500 }, { "epoch": 0.5221932114882507, "eval_retrival_loss": 0.01873534359037876, "eval_retrival_runtime": 1.6428, "eval_retrival_samples_per_second": 608.716, "eval_retrival_steps_per_second": 19.479, "step": 500 }, { "epoch": 0.5221932114882507, "eval_sts_loss": 2.652181386947632, "eval_sts_runtime": 2.0073, "eval_sts_samples_per_second": 237.63, "eval_sts_steps_per_second": 7.473, "step": 500 }, { "epoch": 0.5221932114882507, "eval_reranking_loss": 0.2930990159511566, "eval_reranking_runtime": 5.9616, "eval_reranking_samples_per_second": 167.74, "eval_reranking_steps_per_second": 5.368, "step": 500 }, { "epoch": 1.0443864229765012, "grad_norm": 5.96875, "learning_rate": 1.4618836502727944e-05, "loss": 0.6813, "step": 1000 }, { "epoch": 1.0443864229765012, "eval_retrival_loss": 0.013903363607823849, "eval_retrival_runtime": 1.7481, "eval_retrival_samples_per_second": 572.037, "eval_retrival_steps_per_second": 18.305, "step": 1000 }, { "epoch": 1.0443864229765012, "eval_sts_loss": 2.5108530521392822, "eval_sts_runtime": 2.0678, "eval_sts_samples_per_second": 230.679, "eval_sts_steps_per_second": 7.254, "step": 1000 }, { "epoch": 1.0443864229765012, "eval_reranking_loss": 0.2695285677909851, "eval_reranking_runtime": 6.0139, "eval_reranking_samples_per_second": 166.28, "eval_reranking_steps_per_second": 5.321, "step": 1000 }, { "epoch": 1.566579634464752, "grad_norm": 15.875, "learning_rate": 9.32090426406817e-06, "loss": 0.5148, "step": 1500 }, { "epoch": 1.566579634464752, "eval_retrival_loss": 0.011771922931075096, "eval_retrival_runtime": 1.704, "eval_retrival_samples_per_second": 586.858, "eval_retrival_steps_per_second": 18.779, "step": 1500 }, { "epoch": 1.566579634464752, "eval_sts_loss": 2.526954412460327, "eval_sts_runtime": 2.021, "eval_sts_samples_per_second": 236.022, "eval_sts_steps_per_second": 7.422, "step": 1500 }, { "epoch": 1.566579634464752, "eval_reranking_loss": 0.28074678778648376, "eval_reranking_runtime": 5.9437, "eval_reranking_samples_per_second": 168.245, "eval_reranking_steps_per_second": 5.384, "step": 1500 }, { "epoch": 2.0887728459530024, "grad_norm": 8.625, "learning_rate": 4.221910835622651e-06, "loss": 0.48, "step": 2000 }, { "epoch": 2.0887728459530024, "eval_retrival_loss": 0.011438765563070774, "eval_retrival_runtime": 1.7039, "eval_retrival_samples_per_second": 586.897, "eval_retrival_steps_per_second": 18.781, "step": 2000 }, { "epoch": 2.0887728459530024, "eval_sts_loss": 2.541757106781006, "eval_sts_runtime": 2.0493, "eval_sts_samples_per_second": 232.762, "eval_sts_steps_per_second": 7.32, "step": 2000 }, { "epoch": 2.0887728459530024, "eval_reranking_loss": 0.27911052107810974, "eval_reranking_runtime": 5.9433, "eval_reranking_samples_per_second": 168.256, "eval_reranking_steps_per_second": 5.384, "step": 2000 }, { "epoch": 2.6109660574412534, "grad_norm": 10.625, "learning_rate": 8.155891806138993e-07, "loss": 0.3782, "step": 2500 }, { "epoch": 2.6109660574412534, "eval_retrival_loss": 0.01174311526119709, "eval_retrival_runtime": 1.6915, "eval_retrival_samples_per_second": 591.189, "eval_retrival_steps_per_second": 18.918, "step": 2500 }, { "epoch": 2.6109660574412534, "eval_sts_loss": 2.573981285095215, "eval_sts_runtime": 2.0103, "eval_sts_samples_per_second": 237.277, "eval_sts_steps_per_second": 7.462, "step": 2500 }, { "epoch": 2.6109660574412534, "eval_reranking_loss": 0.2787380516529083, "eval_reranking_runtime": 5.9757, "eval_reranking_samples_per_second": 167.345, "eval_reranking_steps_per_second": 5.355, "step": 2500 } ], "logging_steps": 500, "max_steps": 2871, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }