{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 100, "global_step": 621, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04830917874396135, "grad_norm": 37.65997314453125, "learning_rate": 4.0000000000000003e-07, "loss": 0.8115, "step": 10 }, { "epoch": 0.0966183574879227, "grad_norm": 49.085182189941406, "learning_rate": 8.000000000000001e-07, "loss": 0.7151, "step": 20 }, { "epoch": 0.14492753623188406, "grad_norm": 57.3139533996582, "learning_rate": 1.2000000000000002e-06, "loss": 0.5399, "step": 30 }, { "epoch": 0.1932367149758454, "grad_norm": 14.93040943145752, "learning_rate": 1.6000000000000001e-06, "loss": 0.4486, "step": 40 }, { "epoch": 0.24154589371980675, "grad_norm": 40.2352409362793, "learning_rate": 2.0000000000000003e-06, "loss": 0.2895, "step": 50 }, { "epoch": 0.2898550724637681, "grad_norm": 22.021743774414062, "learning_rate": 2.4000000000000003e-06, "loss": 0.208, "step": 60 }, { "epoch": 0.33816425120772947, "grad_norm": 3.9624946117401123, "learning_rate": 2.8000000000000003e-06, "loss": 0.158, "step": 70 }, { "epoch": 0.3864734299516908, "grad_norm": 3.648684501647949, "learning_rate": 3.2000000000000003e-06, "loss": 0.1099, "step": 80 }, { "epoch": 0.43478260869565216, "grad_norm": 1.9831331968307495, "learning_rate": 3.6000000000000003e-06, "loss": 0.0898, "step": 90 }, { "epoch": 0.4830917874396135, "grad_norm": 1.1971267461776733, "learning_rate": 4.000000000000001e-06, "loss": 0.0561, "step": 100 }, { "epoch": 0.4830917874396135, "eval_accuracy": 0.9926650366748166, "eval_accuracy_label_Clickbait": 0.9933110367892977, "eval_accuracy_label_Factual": 0.9922928709055877, "eval_f1": 0.9926701815332624, "eval_loss": 0.04882814362645149, "eval_precision": 0.9926880698400764, "eval_recall": 0.9926650366748166, "eval_runtime": 0.8226, "eval_samples_per_second": 994.464, "eval_steps_per_second": 63.218, "step": 100 }, { "epoch": 0.5314009661835749, "grad_norm": 0.8438642621040344, "learning_rate": 4.4e-06, "loss": 0.061, "step": 110 }, { "epoch": 0.5797101449275363, "grad_norm": 0.5306077003479004, "learning_rate": 4.800000000000001e-06, "loss": 0.0523, "step": 120 }, { "epoch": 0.6280193236714976, "grad_norm": 42.116844177246094, "learning_rate": 5.2e-06, "loss": 0.0555, "step": 130 }, { "epoch": 0.6763285024154589, "grad_norm": 0.45348191261291504, "learning_rate": 5.600000000000001e-06, "loss": 0.0511, "step": 140 }, { "epoch": 0.7246376811594203, "grad_norm": 0.273034930229187, "learning_rate": 6e-06, "loss": 0.0461, "step": 150 }, { "epoch": 0.7729468599033816, "grad_norm": 0.32376888394355774, "learning_rate": 6.4000000000000006e-06, "loss": 0.0355, "step": 160 }, { "epoch": 0.821256038647343, "grad_norm": 0.46599268913269043, "learning_rate": 6.800000000000001e-06, "loss": 0.0057, "step": 170 }, { "epoch": 0.8695652173913043, "grad_norm": 76.78250122070312, "learning_rate": 7.2000000000000005e-06, "loss": 0.0284, "step": 180 }, { "epoch": 0.9178743961352657, "grad_norm": 0.27929720282554626, "learning_rate": 7.600000000000001e-06, "loss": 0.0154, "step": 190 }, { "epoch": 0.966183574879227, "grad_norm": 0.09468149393796921, "learning_rate": 8.000000000000001e-06, "loss": 0.0037, "step": 200 }, { "epoch": 0.966183574879227, "eval_accuracy": 0.9987775061124694, "eval_accuracy_label_Clickbait": 0.9966555183946488, "eval_accuracy_label_Factual": 1.0, "eval_f1": 0.998777070551364, "eval_loss": 0.00973883830010891, "eval_precision": 0.9987798570622531, "eval_recall": 0.9987775061124694, "eval_runtime": 0.8161, "eval_samples_per_second": 1002.369, "eval_steps_per_second": 63.72, "step": 200 }, { "epoch": 1.0144927536231885, "grad_norm": 11.390016555786133, "learning_rate": 8.400000000000001e-06, "loss": 0.033, "step": 210 }, { "epoch": 1.0628019323671498, "grad_norm": 0.6215488910675049, "learning_rate": 8.8e-06, "loss": 0.0279, "step": 220 }, { "epoch": 1.1111111111111112, "grad_norm": 0.1523633599281311, "learning_rate": 9.200000000000002e-06, "loss": 0.0093, "step": 230 }, { "epoch": 1.1594202898550725, "grad_norm": 0.10952762514352798, "learning_rate": 9.600000000000001e-06, "loss": 0.022, "step": 240 }, { "epoch": 1.2077294685990339, "grad_norm": 0.07856310158967972, "learning_rate": 1e-05, "loss": 0.0309, "step": 250 }, { "epoch": 1.2560386473429952, "grad_norm": 0.05758531391620636, "learning_rate": 1.04e-05, "loss": 0.0015, "step": 260 }, { "epoch": 1.3043478260869565, "grad_norm": 0.049695733934640884, "learning_rate": 1.0800000000000002e-05, "loss": 0.0071, "step": 270 }, { "epoch": 1.3526570048309179, "grad_norm": 0.19512628018856049, "learning_rate": 1.1200000000000001e-05, "loss": 0.0054, "step": 280 }, { "epoch": 1.4009661835748792, "grad_norm": 0.049039632081985474, "learning_rate": 1.16e-05, "loss": 0.023, "step": 290 }, { "epoch": 1.4492753623188406, "grad_norm": 0.06413820385932922, "learning_rate": 1.2e-05, "loss": 0.0012, "step": 300 }, { "epoch": 1.4492753623188406, "eval_accuracy": 1.0, "eval_accuracy_label_Clickbait": 1.0, "eval_accuracy_label_Factual": 1.0, "eval_f1": 1.0, "eval_loss": 0.0015956248389557004, "eval_precision": 1.0, "eval_recall": 1.0, "eval_runtime": 0.8233, "eval_samples_per_second": 993.556, "eval_steps_per_second": 63.16, "step": 300 }, { "epoch": 1.497584541062802, "grad_norm": 0.04210774227976799, "learning_rate": 1.2400000000000002e-05, "loss": 0.0012, "step": 310 }, { "epoch": 1.5458937198067633, "grad_norm": 0.02976871468126774, "learning_rate": 1.2800000000000001e-05, "loss": 0.0079, "step": 320 }, { "epoch": 1.5942028985507246, "grad_norm": 0.029957927763462067, "learning_rate": 1.3200000000000002e-05, "loss": 0.0008, "step": 330 }, { "epoch": 1.642512077294686, "grad_norm": 12.84114933013916, "learning_rate": 1.3600000000000002e-05, "loss": 0.0168, "step": 340 }, { "epoch": 1.6908212560386473, "grad_norm": 0.6662724018096924, "learning_rate": 1.4e-05, "loss": 0.0209, "step": 350 }, { "epoch": 1.7391304347826086, "grad_norm": 0.036532897502183914, "learning_rate": 1.4400000000000001e-05, "loss": 0.0008, "step": 360 }, { "epoch": 1.78743961352657, "grad_norm": 0.05894944816827774, "learning_rate": 1.48e-05, "loss": 0.0351, "step": 370 }, { "epoch": 1.8357487922705316, "grad_norm": 0.03172897920012474, "learning_rate": 1.5200000000000002e-05, "loss": 0.0156, "step": 380 }, { "epoch": 1.8840579710144927, "grad_norm": 60.220420837402344, "learning_rate": 1.5600000000000003e-05, "loss": 0.106, "step": 390 }, { "epoch": 1.9323671497584543, "grad_norm": 0.045578889548778534, "learning_rate": 1.6000000000000003e-05, "loss": 0.0012, "step": 400 }, { "epoch": 1.9323671497584543, "eval_accuracy": 1.0, "eval_accuracy_label_Clickbait": 1.0, "eval_accuracy_label_Factual": 1.0, "eval_f1": 1.0, "eval_loss": 0.001607110258191824, "eval_precision": 1.0, "eval_recall": 1.0, "eval_runtime": 0.822, "eval_samples_per_second": 995.143, "eval_steps_per_second": 63.261, "step": 400 }, { "epoch": 1.9806763285024154, "grad_norm": 0.038461122661828995, "learning_rate": 1.64e-05, "loss": 0.0011, "step": 410 }, { "epoch": 2.028985507246377, "grad_norm": 0.024971311911940575, "learning_rate": 1.6800000000000002e-05, "loss": 0.0008, "step": 420 }, { "epoch": 2.077294685990338, "grad_norm": 0.021732186898589134, "learning_rate": 1.72e-05, "loss": 0.0005, "step": 430 }, { "epoch": 2.1256038647342996, "grad_norm": 22.902217864990234, "learning_rate": 1.76e-05, "loss": 0.0134, "step": 440 }, { "epoch": 2.1739130434782608, "grad_norm": 0.05803954228758812, "learning_rate": 1.8e-05, "loss": 0.0005, "step": 450 }, { "epoch": 2.2222222222222223, "grad_norm": 0.016587387770414352, "learning_rate": 1.8400000000000003e-05, "loss": 0.0004, "step": 460 }, { "epoch": 2.2705314009661834, "grad_norm": 0.014241261407732964, "learning_rate": 1.88e-05, "loss": 0.0004, "step": 470 }, { "epoch": 2.318840579710145, "grad_norm": 0.013285420835018158, "learning_rate": 1.9200000000000003e-05, "loss": 0.0003, "step": 480 }, { "epoch": 2.367149758454106, "grad_norm": 0.008689775131642818, "learning_rate": 1.9600000000000002e-05, "loss": 0.0003, "step": 490 }, { "epoch": 2.4154589371980677, "grad_norm": 0.05173454433679581, "learning_rate": 2e-05, "loss": 0.0433, "step": 500 }, { "epoch": 2.4154589371980677, "eval_accuracy": 0.9987775061124694, "eval_accuracy_label_Clickbait": 0.9966555183946488, "eval_accuracy_label_Factual": 1.0, "eval_f1": 0.998777070551364, "eval_loss": 0.0020217353012412786, "eval_precision": 0.9987798570622531, "eval_recall": 0.9987775061124694, "eval_runtime": 0.8279, "eval_samples_per_second": 988.004, "eval_steps_per_second": 62.807, "step": 500 }, { "epoch": 2.463768115942029, "grad_norm": 0.07507430762052536, "learning_rate": 1.834710743801653e-05, "loss": 0.0055, "step": 510 }, { "epoch": 2.5120772946859904, "grad_norm": 42.797401428222656, "learning_rate": 1.669421487603306e-05, "loss": 0.0161, "step": 520 }, { "epoch": 2.5603864734299515, "grad_norm": 0.01774718426167965, "learning_rate": 1.504132231404959e-05, "loss": 0.0006, "step": 530 }, { "epoch": 2.608695652173913, "grad_norm": 0.022867949679493904, "learning_rate": 1.3388429752066117e-05, "loss": 0.0191, "step": 540 }, { "epoch": 2.6570048309178746, "grad_norm": 0.013622589409351349, "learning_rate": 1.1735537190082646e-05, "loss": 0.0043, "step": 550 }, { "epoch": 2.7053140096618358, "grad_norm": 0.012125013396143913, "learning_rate": 1.0082644628099174e-05, "loss": 0.0003, "step": 560 }, { "epoch": 2.753623188405797, "grad_norm": 0.009969482198357582, "learning_rate": 8.429752066115703e-06, "loss": 0.0193, "step": 570 }, { "epoch": 2.8019323671497585, "grad_norm": 0.010625869035720825, "learning_rate": 6.776859504132232e-06, "loss": 0.0008, "step": 580 }, { "epoch": 2.85024154589372, "grad_norm": 0.010633111000061035, "learning_rate": 5.12396694214876e-06, "loss": 0.0143, "step": 590 }, { "epoch": 2.898550724637681, "grad_norm": 0.011428612284362316, "learning_rate": 3.4710743801652895e-06, "loss": 0.0003, "step": 600 }, { "epoch": 2.898550724637681, "eval_accuracy": 0.9951100244498777, "eval_accuracy_label_Clickbait": 0.9866220735785953, "eval_accuracy_label_Factual": 1.0, "eval_f1": 0.9951029456353522, "eval_loss": 0.016679394990205765, "eval_precision": 0.9951474238804714, "eval_recall": 0.9951100244498777, "eval_runtime": 0.8314, "eval_samples_per_second": 983.91, "eval_steps_per_second": 62.547, "step": 600 }, { "epoch": 2.9468599033816423, "grad_norm": 0.010514287278056145, "learning_rate": 1.8181818181818183e-06, "loss": 0.0003, "step": 610 }, { "epoch": 2.995169082125604, "grad_norm": 104.85121154785156, "learning_rate": 1.6528925619834713e-07, "loss": 0.0108, "step": 620 }, { "epoch": 3.0, "step": 621, "total_flos": 29633646182400.0, "train_loss": 0.06926822083632517, "train_runtime": 68.411, "train_samples_per_second": 290.041, "train_steps_per_second": 9.077 }, { "epoch": 3.0, "eval_accuracy": 0.9951100244498777, "eval_accuracy_label_Clickbait": 0.9866220735785953, "eval_accuracy_label_Factual": 1.0, "eval_f1": 0.9951029456353522, "eval_loss": 0.017279641702771187, "eval_precision": 0.9951474238804714, "eval_recall": 0.9951100244498777, "eval_runtime": 0.8191, "eval_samples_per_second": 998.621, "eval_steps_per_second": 63.482, "step": 621 } ], "logging_steps": 10, "max_steps": 621, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 29633646182400.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }