NewModel7NonFormatted / trainer_state.json
MartaTT's picture
Upload 7 files
377350b verified
{
"best_metric": 0.8465077877044678,
"best_model_checkpoint": "outputs/checkpoint-92",
"epoch": 1.0,
"eval_steps": 23,
"global_step": 92,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.010869565217391304,
"grad_norm": 0.23393695056438446,
"learning_rate": 0.001,
"loss": 2.6154,
"step": 1
},
{
"epoch": 0.021739130434782608,
"grad_norm": 0.4464223086833954,
"learning_rate": 0.001,
"loss": 2.3194,
"step": 2
},
{
"epoch": 0.03260869565217391,
"grad_norm": 1.6278090476989746,
"learning_rate": 0.001,
"loss": 2.1998,
"step": 3
},
{
"epoch": 0.043478260869565216,
"grad_norm": 0.32962527871131897,
"learning_rate": 0.001,
"loss": 1.9167,
"step": 4
},
{
"epoch": 0.05434782608695652,
"grad_norm": 0.4716239273548126,
"learning_rate": 0.001,
"loss": 1.7546,
"step": 5
},
{
"epoch": 0.06521739130434782,
"grad_norm": 0.38779595494270325,
"learning_rate": 0.001,
"loss": 1.6069,
"step": 6
},
{
"epoch": 0.07608695652173914,
"grad_norm": 0.3187870681285858,
"learning_rate": 0.001,
"loss": 1.5629,
"step": 7
},
{
"epoch": 0.08695652173913043,
"grad_norm": 0.27891236543655396,
"learning_rate": 0.001,
"loss": 1.4564,
"step": 8
},
{
"epoch": 0.09782608695652174,
"grad_norm": 0.2857877314090729,
"learning_rate": 0.001,
"loss": 1.4014,
"step": 9
},
{
"epoch": 0.10869565217391304,
"grad_norm": 0.2176678329706192,
"learning_rate": 0.001,
"loss": 1.3423,
"step": 10
},
{
"epoch": 0.11956521739130435,
"grad_norm": 0.27025488018989563,
"learning_rate": 0.001,
"loss": 1.3101,
"step": 11
},
{
"epoch": 0.13043478260869565,
"grad_norm": 0.2788834571838379,
"learning_rate": 0.001,
"loss": 1.2676,
"step": 12
},
{
"epoch": 0.14130434782608695,
"grad_norm": 0.24594657123088837,
"learning_rate": 0.001,
"loss": 1.3237,
"step": 13
},
{
"epoch": 0.15217391304347827,
"grad_norm": 0.2215934842824936,
"learning_rate": 0.001,
"loss": 1.2517,
"step": 14
},
{
"epoch": 0.16304347826086957,
"grad_norm": 0.1776692122220993,
"learning_rate": 0.001,
"loss": 1.2289,
"step": 15
},
{
"epoch": 0.17391304347826086,
"grad_norm": 0.18647059798240662,
"learning_rate": 0.001,
"loss": 1.2342,
"step": 16
},
{
"epoch": 0.18478260869565216,
"grad_norm": 0.2081366926431656,
"learning_rate": 0.001,
"loss": 1.2139,
"step": 17
},
{
"epoch": 0.1956521739130435,
"grad_norm": 0.18122343719005585,
"learning_rate": 0.001,
"loss": 1.1598,
"step": 18
},
{
"epoch": 0.20652173913043478,
"grad_norm": 0.19013595581054688,
"learning_rate": 0.001,
"loss": 1.2007,
"step": 19
},
{
"epoch": 0.21739130434782608,
"grad_norm": 0.17870813608169556,
"learning_rate": 0.001,
"loss": 1.1708,
"step": 20
},
{
"epoch": 0.22826086956521738,
"grad_norm": 0.19605986773967743,
"learning_rate": 0.001,
"loss": 1.0883,
"step": 21
},
{
"epoch": 0.2391304347826087,
"grad_norm": 0.1825353056192398,
"learning_rate": 0.001,
"loss": 1.1113,
"step": 22
},
{
"epoch": 0.25,
"grad_norm": 0.20768754184246063,
"learning_rate": 0.001,
"loss": 1.1057,
"step": 23
},
{
"epoch": 0.25,
"eval_loss": 1.0819175243377686,
"eval_runtime": 39.6272,
"eval_samples_per_second": 29.55,
"eval_steps_per_second": 3.71,
"step": 23
},
{
"epoch": 0.2608695652173913,
"grad_norm": 0.24947208166122437,
"learning_rate": 0.001,
"loss": 1.0427,
"step": 24
},
{
"epoch": 0.2717391304347826,
"grad_norm": 0.41416075825691223,
"learning_rate": 0.001,
"loss": 1.0768,
"step": 25
},
{
"epoch": 0.2826086956521739,
"grad_norm": 3.9600319862365723,
"learning_rate": 0.001,
"loss": 1.069,
"step": 26
},
{
"epoch": 0.29347826086956524,
"grad_norm": 0.8276275992393494,
"learning_rate": 0.001,
"loss": 1.1223,
"step": 27
},
{
"epoch": 0.30434782608695654,
"grad_norm": 0.23127484321594238,
"learning_rate": 0.001,
"loss": 1.0725,
"step": 28
},
{
"epoch": 0.31521739130434784,
"grad_norm": 0.22777508199214935,
"learning_rate": 0.001,
"loss": 1.1025,
"step": 29
},
{
"epoch": 0.32608695652173914,
"grad_norm": 0.23111239075660706,
"learning_rate": 0.001,
"loss": 1.0722,
"step": 30
},
{
"epoch": 0.33695652173913043,
"grad_norm": 0.24532677233219147,
"learning_rate": 0.001,
"loss": 1.1259,
"step": 31
},
{
"epoch": 0.34782608695652173,
"grad_norm": 0.7043253183364868,
"learning_rate": 0.001,
"loss": 1.0502,
"step": 32
},
{
"epoch": 0.358695652173913,
"grad_norm": 0.22311310470104218,
"learning_rate": 0.001,
"loss": 0.9918,
"step": 33
},
{
"epoch": 0.3695652173913043,
"grad_norm": 0.2729714810848236,
"learning_rate": 0.001,
"loss": 1.0666,
"step": 34
},
{
"epoch": 0.3804347826086957,
"grad_norm": 0.1948792189359665,
"learning_rate": 0.001,
"loss": 0.9834,
"step": 35
},
{
"epoch": 0.391304347826087,
"grad_norm": 0.17322953045368195,
"learning_rate": 0.001,
"loss": 0.9522,
"step": 36
},
{
"epoch": 0.40217391304347827,
"grad_norm": 0.15274113416671753,
"learning_rate": 0.001,
"loss": 0.9644,
"step": 37
},
{
"epoch": 0.41304347826086957,
"grad_norm": 0.18090985715389252,
"learning_rate": 0.001,
"loss": 0.991,
"step": 38
},
{
"epoch": 0.42391304347826086,
"grad_norm": 0.1719120740890503,
"learning_rate": 0.001,
"loss": 0.9659,
"step": 39
},
{
"epoch": 0.43478260869565216,
"grad_norm": 0.17336900532245636,
"learning_rate": 0.001,
"loss": 1.0173,
"step": 40
},
{
"epoch": 0.44565217391304346,
"grad_norm": 0.16952969133853912,
"learning_rate": 0.001,
"loss": 0.9946,
"step": 41
},
{
"epoch": 0.45652173913043476,
"grad_norm": 0.1436886191368103,
"learning_rate": 0.001,
"loss": 0.9536,
"step": 42
},
{
"epoch": 0.4673913043478261,
"grad_norm": 0.16588632762432098,
"learning_rate": 0.001,
"loss": 0.9723,
"step": 43
},
{
"epoch": 0.4782608695652174,
"grad_norm": 0.1483495533466339,
"learning_rate": 0.001,
"loss": 0.9297,
"step": 44
},
{
"epoch": 0.4891304347826087,
"grad_norm": 0.1516825258731842,
"learning_rate": 0.001,
"loss": 0.9911,
"step": 45
},
{
"epoch": 0.5,
"grad_norm": 0.16181206703186035,
"learning_rate": 0.001,
"loss": 0.9688,
"step": 46
},
{
"epoch": 0.5,
"eval_loss": 0.9558140635490417,
"eval_runtime": 39.5819,
"eval_samples_per_second": 29.584,
"eval_steps_per_second": 3.714,
"step": 46
},
{
"epoch": 0.5108695652173914,
"grad_norm": 0.14223167300224304,
"learning_rate": 0.001,
"loss": 0.8418,
"step": 47
},
{
"epoch": 0.5217391304347826,
"grad_norm": 0.14156702160835266,
"learning_rate": 0.001,
"loss": 0.868,
"step": 48
},
{
"epoch": 0.532608695652174,
"grad_norm": 0.1668156236410141,
"learning_rate": 0.001,
"loss": 0.9643,
"step": 49
},
{
"epoch": 0.5434782608695652,
"grad_norm": 0.14512571692466736,
"learning_rate": 0.001,
"loss": 0.9395,
"step": 50
},
{
"epoch": 0.5543478260869565,
"grad_norm": 0.1591370701789856,
"learning_rate": 0.001,
"loss": 0.9467,
"step": 51
},
{
"epoch": 0.5652173913043478,
"grad_norm": 0.15379436314105988,
"learning_rate": 0.001,
"loss": 0.984,
"step": 52
},
{
"epoch": 0.5760869565217391,
"grad_norm": 0.15107646584510803,
"learning_rate": 0.001,
"loss": 0.924,
"step": 53
},
{
"epoch": 0.5869565217391305,
"grad_norm": 0.14596650004386902,
"learning_rate": 0.001,
"loss": 0.933,
"step": 54
},
{
"epoch": 0.5978260869565217,
"grad_norm": 0.13844196498394012,
"learning_rate": 0.001,
"loss": 0.8861,
"step": 55
},
{
"epoch": 0.6086956521739131,
"grad_norm": 0.1443144679069519,
"learning_rate": 0.001,
"loss": 0.9239,
"step": 56
},
{
"epoch": 0.6195652173913043,
"grad_norm": 0.1431627869606018,
"learning_rate": 0.001,
"loss": 0.9399,
"step": 57
},
{
"epoch": 0.6304347826086957,
"grad_norm": 0.13777939975261688,
"learning_rate": 0.001,
"loss": 0.8712,
"step": 58
},
{
"epoch": 0.6413043478260869,
"grad_norm": 0.14304274320602417,
"learning_rate": 0.001,
"loss": 0.977,
"step": 59
},
{
"epoch": 0.6521739130434783,
"grad_norm": 0.13079452514648438,
"learning_rate": 0.001,
"loss": 0.8102,
"step": 60
},
{
"epoch": 0.6630434782608695,
"grad_norm": 0.13848628103733063,
"learning_rate": 0.001,
"loss": 0.8854,
"step": 61
},
{
"epoch": 0.6739130434782609,
"grad_norm": 0.13032200932502747,
"learning_rate": 0.001,
"loss": 0.9307,
"step": 62
},
{
"epoch": 0.6847826086956522,
"grad_norm": 0.13883714377880096,
"learning_rate": 0.001,
"loss": 0.9464,
"step": 63
},
{
"epoch": 0.6956521739130435,
"grad_norm": 0.12983089685440063,
"learning_rate": 0.001,
"loss": 0.9065,
"step": 64
},
{
"epoch": 0.7065217391304348,
"grad_norm": 0.12657804787158966,
"learning_rate": 0.001,
"loss": 0.8866,
"step": 65
},
{
"epoch": 0.717391304347826,
"grad_norm": 0.14905163645744324,
"learning_rate": 0.001,
"loss": 0.9583,
"step": 66
},
{
"epoch": 0.7282608695652174,
"grad_norm": 0.1388324350118637,
"learning_rate": 0.001,
"loss": 0.7862,
"step": 67
},
{
"epoch": 0.7391304347826086,
"grad_norm": 0.13726989924907684,
"learning_rate": 0.001,
"loss": 0.8841,
"step": 68
},
{
"epoch": 0.75,
"grad_norm": 0.14547140896320343,
"learning_rate": 0.001,
"loss": 0.9234,
"step": 69
},
{
"epoch": 0.75,
"eval_loss": 0.8918075561523438,
"eval_runtime": 39.653,
"eval_samples_per_second": 29.531,
"eval_steps_per_second": 3.707,
"step": 69
},
{
"epoch": 0.7608695652173914,
"grad_norm": 0.12735989689826965,
"learning_rate": 0.001,
"loss": 0.8529,
"step": 70
},
{
"epoch": 0.7717391304347826,
"grad_norm": 0.12790235877037048,
"learning_rate": 0.001,
"loss": 0.8211,
"step": 71
},
{
"epoch": 0.782608695652174,
"grad_norm": 0.14512644708156586,
"learning_rate": 0.001,
"loss": 0.9544,
"step": 72
},
{
"epoch": 0.7934782608695652,
"grad_norm": 0.1390400230884552,
"learning_rate": 0.001,
"loss": 0.8758,
"step": 73
},
{
"epoch": 0.8043478260869565,
"grad_norm": 0.12742573022842407,
"learning_rate": 0.001,
"loss": 0.8937,
"step": 74
},
{
"epoch": 0.8152173913043478,
"grad_norm": 0.1397847682237625,
"learning_rate": 0.001,
"loss": 0.9319,
"step": 75
},
{
"epoch": 0.8260869565217391,
"grad_norm": 0.13563187420368195,
"learning_rate": 0.001,
"loss": 0.9594,
"step": 76
},
{
"epoch": 0.8369565217391305,
"grad_norm": 0.13903594017028809,
"learning_rate": 0.001,
"loss": 0.8688,
"step": 77
},
{
"epoch": 0.8478260869565217,
"grad_norm": 0.12712402641773224,
"learning_rate": 0.001,
"loss": 0.8964,
"step": 78
},
{
"epoch": 0.8586956521739131,
"grad_norm": 0.131251260638237,
"learning_rate": 0.001,
"loss": 0.858,
"step": 79
},
{
"epoch": 0.8695652173913043,
"grad_norm": 0.1342822164297104,
"learning_rate": 0.001,
"loss": 0.8941,
"step": 80
},
{
"epoch": 0.8804347826086957,
"grad_norm": 0.1401877999305725,
"learning_rate": 0.001,
"loss": 0.8575,
"step": 81
},
{
"epoch": 0.8913043478260869,
"grad_norm": 0.13211235404014587,
"learning_rate": 0.001,
"loss": 0.8475,
"step": 82
},
{
"epoch": 0.9021739130434783,
"grad_norm": 0.13683001697063446,
"learning_rate": 0.001,
"loss": 0.8534,
"step": 83
},
{
"epoch": 0.9130434782608695,
"grad_norm": 0.13229840993881226,
"learning_rate": 0.001,
"loss": 0.8567,
"step": 84
},
{
"epoch": 0.9239130434782609,
"grad_norm": 0.12229685485363007,
"learning_rate": 0.001,
"loss": 0.7974,
"step": 85
},
{
"epoch": 0.9347826086956522,
"grad_norm": 0.14769427478313446,
"learning_rate": 0.001,
"loss": 0.8962,
"step": 86
},
{
"epoch": 0.9456521739130435,
"grad_norm": 0.12334488332271576,
"learning_rate": 0.001,
"loss": 0.817,
"step": 87
},
{
"epoch": 0.9565217391304348,
"grad_norm": 0.11790772527456284,
"learning_rate": 0.001,
"loss": 0.8305,
"step": 88
},
{
"epoch": 0.967391304347826,
"grad_norm": 0.15138116478919983,
"learning_rate": 0.001,
"loss": 0.8873,
"step": 89
},
{
"epoch": 0.9782608695652174,
"grad_norm": 0.14076858758926392,
"learning_rate": 0.001,
"loss": 0.841,
"step": 90
},
{
"epoch": 0.9891304347826086,
"grad_norm": 0.15599556267261505,
"learning_rate": 0.001,
"loss": 0.9173,
"step": 91
},
{
"epoch": 1.0,
"grad_norm": 0.19630737602710724,
"learning_rate": 0.001,
"loss": 0.8713,
"step": 92
},
{
"epoch": 1.0,
"eval_loss": 0.8465077877044678,
"eval_runtime": 39.5565,
"eval_samples_per_second": 29.603,
"eval_steps_per_second": 3.716,
"step": 92
},
{
"epoch": 1.0,
"step": 92,
"total_flos": 2.3790959179333632e+17,
"train_loss": 1.0686528734538867,
"train_runtime": 1550.4974,
"train_samples_per_second": 7.55,
"train_steps_per_second": 0.059
}
],
"logging_steps": 1,
"max_steps": 92,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 23,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.3790959179333632e+17,
"train_batch_size": 128,
"trial_name": null,
"trial_params": null
}