|
{ |
|
"best_metric": 5.793323516845703, |
|
"best_model_checkpoint": "turkish-embedding-model/checkpoint-1564", |
|
"epoch": 4.0, |
|
"eval_steps": 500, |
|
"global_step": 3128, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0319693094629156, |
|
"grad_norm": 70.72708129882812, |
|
"learning_rate": 1.1253196930946293e-06, |
|
"loss": 17.17, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0639386189258312, |
|
"grad_norm": 81.68770599365234, |
|
"learning_rate": 2.4040920716112534e-06, |
|
"loss": 16.4932, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0959079283887468, |
|
"grad_norm": 109.91338348388672, |
|
"learning_rate": 3.6828644501278778e-06, |
|
"loss": 16.5976, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1278772378516624, |
|
"grad_norm": 73.892578125, |
|
"learning_rate": 4.961636828644502e-06, |
|
"loss": 15.6991, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.159846547314578, |
|
"grad_norm": 79.35150909423828, |
|
"learning_rate": 6.240409207161126e-06, |
|
"loss": 14.876, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.1918158567774936, |
|
"grad_norm": 83.0904541015625, |
|
"learning_rate": 7.5191815856777495e-06, |
|
"loss": 14.4828, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2237851662404092, |
|
"grad_norm": 76.82855987548828, |
|
"learning_rate": 8.797953964194374e-06, |
|
"loss": 12.7061, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.2557544757033248, |
|
"grad_norm": 51.30181121826172, |
|
"learning_rate": 1.0076726342710998e-05, |
|
"loss": 10.8687, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2877237851662404, |
|
"grad_norm": 18.70808219909668, |
|
"learning_rate": 1.1355498721227622e-05, |
|
"loss": 8.3797, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.319693094629156, |
|
"grad_norm": 1.3039417266845703, |
|
"learning_rate": 1.2634271099744246e-05, |
|
"loss": 6.2029, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.3516624040920716, |
|
"grad_norm": 0.2324853092432022, |
|
"learning_rate": 1.391304347826087e-05, |
|
"loss": 5.8228, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.3836317135549872, |
|
"grad_norm": 0.1757364720106125, |
|
"learning_rate": 1.5191815856777494e-05, |
|
"loss": 5.811, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4156010230179028, |
|
"grad_norm": 0.1788654774427414, |
|
"learning_rate": 1.647058823529412e-05, |
|
"loss": 5.8079, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.4475703324808184, |
|
"grad_norm": 0.12862567603588104, |
|
"learning_rate": 1.7749360613810744e-05, |
|
"loss": 5.8077, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.479539641943734, |
|
"grad_norm": 0.14497514069080353, |
|
"learning_rate": 1.9028132992327367e-05, |
|
"loss": 5.8035, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.5115089514066496, |
|
"grad_norm": 0.1350390762090683, |
|
"learning_rate": 1.996589940323956e-05, |
|
"loss": 5.8072, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5434782608695652, |
|
"grad_norm": 0.1435602754354477, |
|
"learning_rate": 1.9823813583404378e-05, |
|
"loss": 5.8033, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.5754475703324808, |
|
"grad_norm": 0.11389254033565521, |
|
"learning_rate": 1.96817277635692e-05, |
|
"loss": 5.8086, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.6074168797953964, |
|
"grad_norm": 0.15821650624275208, |
|
"learning_rate": 1.9539641943734017e-05, |
|
"loss": 5.81, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.639386189258312, |
|
"grad_norm": 0.1179889366030693, |
|
"learning_rate": 1.9397556123898838e-05, |
|
"loss": 5.7949, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6713554987212276, |
|
"grad_norm": 0.10912967473268509, |
|
"learning_rate": 1.9255470304063656e-05, |
|
"loss": 5.8079, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.7033248081841432, |
|
"grad_norm": 0.11702870577573776, |
|
"learning_rate": 1.9113384484228477e-05, |
|
"loss": 5.8057, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.7352941176470589, |
|
"grad_norm": 0.13132448494434357, |
|
"learning_rate": 1.8971298664393295e-05, |
|
"loss": 5.8097, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.7672634271099744, |
|
"grad_norm": 0.15833145380020142, |
|
"learning_rate": 1.8829212844558116e-05, |
|
"loss": 5.7986, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.7992327365728901, |
|
"grad_norm": 0.11651863902807236, |
|
"learning_rate": 1.8687127024722937e-05, |
|
"loss": 5.8051, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.8312020460358056, |
|
"grad_norm": 0.5393890142440796, |
|
"learning_rate": 1.854504120488775e-05, |
|
"loss": 5.8041, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.8631713554987213, |
|
"grad_norm": 0.6457561254501343, |
|
"learning_rate": 1.8402955385052572e-05, |
|
"loss": 5.7907, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.8951406649616368, |
|
"grad_norm": 0.5643135905265808, |
|
"learning_rate": 1.8260869565217393e-05, |
|
"loss": 5.7991, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.9271099744245525, |
|
"grad_norm": 3.214787721633911, |
|
"learning_rate": 1.811878374538221e-05, |
|
"loss": 5.8035, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.959079283887468, |
|
"grad_norm": 2.781162977218628, |
|
"learning_rate": 1.7976697925547032e-05, |
|
"loss": 5.7945, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.9910485933503836, |
|
"grad_norm": 0.38559335470199585, |
|
"learning_rate": 1.783461210571185e-05, |
|
"loss": 5.8077, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 5.8023600578308105, |
|
"eval_runtime": 18.0632, |
|
"eval_samples_per_second": 276.805, |
|
"eval_steps_per_second": 4.374, |
|
"eval_tr_ling_pearson_cosine": 0.017751548525136808, |
|
"eval_tr_ling_pearson_dot": 0.025703597820631346, |
|
"eval_tr_ling_pearson_euclidean": 0.02195284877201089, |
|
"eval_tr_ling_pearson_manhattan": 0.02083376479528459, |
|
"eval_tr_ling_pearson_max": 0.025703597820631346, |
|
"eval_tr_ling_spearman_cosine": 0.027108099994157316, |
|
"eval_tr_ling_spearman_dot": 0.03304394653738539, |
|
"eval_tr_ling_spearman_euclidean": 0.025485959636772793, |
|
"eval_tr_ling_spearman_manhattan": 0.024466610177699702, |
|
"eval_tr_ling_spearman_max": 0.03304394653738539, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 1.0230179028132993, |
|
"grad_norm": 0.3645063638687134, |
|
"learning_rate": 1.769252628587667e-05, |
|
"loss": 5.6703, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.054987212276215, |
|
"grad_norm": 0.9638137817382812, |
|
"learning_rate": 1.7550440466041488e-05, |
|
"loss": 5.8052, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.0869565217391304, |
|
"grad_norm": 2.114203691482544, |
|
"learning_rate": 1.740835464620631e-05, |
|
"loss": 5.7936, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.118925831202046, |
|
"grad_norm": 1.8992066383361816, |
|
"learning_rate": 1.7266268826371127e-05, |
|
"loss": 5.7924, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.1508951406649617, |
|
"grad_norm": 2.8299577236175537, |
|
"learning_rate": 1.7124183006535948e-05, |
|
"loss": 5.7806, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.1828644501278773, |
|
"grad_norm": 1.956953525543213, |
|
"learning_rate": 1.698209718670077e-05, |
|
"loss": 5.7835, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.2148337595907928, |
|
"grad_norm": 2.658413887023926, |
|
"learning_rate": 1.6840011366865587e-05, |
|
"loss": 5.7619, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.2468030690537084, |
|
"grad_norm": 1.2760388851165771, |
|
"learning_rate": 1.6697925547030408e-05, |
|
"loss": 5.8038, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.278772378516624, |
|
"grad_norm": 1.7434897422790527, |
|
"learning_rate": 1.6555839727195226e-05, |
|
"loss": 5.779, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.3107416879795397, |
|
"grad_norm": 1.3532071113586426, |
|
"learning_rate": 1.6413753907360047e-05, |
|
"loss": 5.7904, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.3427109974424551, |
|
"grad_norm": 3.7385997772216797, |
|
"learning_rate": 1.6271668087524864e-05, |
|
"loss": 5.7696, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.3746803069053708, |
|
"grad_norm": 0.9061102867126465, |
|
"learning_rate": 1.6129582267689685e-05, |
|
"loss": 5.7919, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.4066496163682864, |
|
"grad_norm": 2.7104809284210205, |
|
"learning_rate": 1.5987496447854503e-05, |
|
"loss": 5.7785, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.438618925831202, |
|
"grad_norm": 1.7147830724716187, |
|
"learning_rate": 1.5845410628019324e-05, |
|
"loss": 5.7862, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.4705882352941178, |
|
"grad_norm": 2.525214672088623, |
|
"learning_rate": 1.5703324808184145e-05, |
|
"loss": 5.7703, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.5025575447570332, |
|
"grad_norm": 1.7794997692108154, |
|
"learning_rate": 1.5561238988348963e-05, |
|
"loss": 5.773, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.5345268542199488, |
|
"grad_norm": 4.901644229888916, |
|
"learning_rate": 1.5419153168513784e-05, |
|
"loss": 5.7627, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.5664961636828645, |
|
"grad_norm": 3.360812187194824, |
|
"learning_rate": 1.52770673486786e-05, |
|
"loss": 5.7596, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 1.59846547314578, |
|
"grad_norm": 1.2768888473510742, |
|
"learning_rate": 1.5134981528843423e-05, |
|
"loss": 5.7882, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.6304347826086958, |
|
"grad_norm": 2.206226348876953, |
|
"learning_rate": 1.4992895709008242e-05, |
|
"loss": 5.7828, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 1.6624040920716112, |
|
"grad_norm": 1.4602406024932861, |
|
"learning_rate": 1.4850809889173061e-05, |
|
"loss": 5.771, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.6943734015345269, |
|
"grad_norm": 1.1597537994384766, |
|
"learning_rate": 1.4708724069337881e-05, |
|
"loss": 5.788, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 1.7263427109974425, |
|
"grad_norm": 3.7494003772735596, |
|
"learning_rate": 1.45666382495027e-05, |
|
"loss": 5.7719, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.758312020460358, |
|
"grad_norm": 1.6271498203277588, |
|
"learning_rate": 1.442455242966752e-05, |
|
"loss": 5.7846, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 1.7902813299232738, |
|
"grad_norm": 2.0469117164611816, |
|
"learning_rate": 1.4282466609832339e-05, |
|
"loss": 5.7838, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.8222506393861893, |
|
"grad_norm": 2.533921003341675, |
|
"learning_rate": 1.4140380789997158e-05, |
|
"loss": 5.7912, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 1.854219948849105, |
|
"grad_norm": 3.291757583618164, |
|
"learning_rate": 1.3998294970161978e-05, |
|
"loss": 5.7686, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.8861892583120206, |
|
"grad_norm": 3.0181350708007812, |
|
"learning_rate": 1.3856209150326799e-05, |
|
"loss": 5.7938, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 1.918158567774936, |
|
"grad_norm": 2.553502321243286, |
|
"learning_rate": 1.3714123330491618e-05, |
|
"loss": 5.7847, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.9501278772378516, |
|
"grad_norm": 1.8034719228744507, |
|
"learning_rate": 1.3572037510656438e-05, |
|
"loss": 5.7952, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 1.9820971867007673, |
|
"grad_norm": 3.7138864994049072, |
|
"learning_rate": 1.3429951690821257e-05, |
|
"loss": 5.7528, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 5.793323516845703, |
|
"eval_runtime": 18.2796, |
|
"eval_samples_per_second": 273.528, |
|
"eval_steps_per_second": 4.322, |
|
"eval_tr_ling_pearson_cosine": 0.037604255015168134, |
|
"eval_tr_ling_pearson_dot": 0.0673696846368413, |
|
"eval_tr_ling_pearson_euclidean": 0.03698411306484619, |
|
"eval_tr_ling_pearson_manhattan": 0.034740275152181296, |
|
"eval_tr_ling_pearson_max": 0.0673696846368413, |
|
"eval_tr_ling_spearman_cosine": 0.04804112988506346, |
|
"eval_tr_ling_spearman_dot": 0.06818119362900125, |
|
"eval_tr_ling_spearman_euclidean": 0.03903062430281842, |
|
"eval_tr_ling_spearman_manhattan": 0.03769766156967754, |
|
"eval_tr_ling_spearman_max": 0.06818119362900125, |
|
"step": 1564 |
|
}, |
|
{ |
|
"epoch": 2.0140664961636827, |
|
"grad_norm": 2.8085248470306396, |
|
"learning_rate": 1.3287865870986076e-05, |
|
"loss": 5.65, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 2.0460358056265986, |
|
"grad_norm": 3.3792033195495605, |
|
"learning_rate": 1.3145780051150896e-05, |
|
"loss": 5.7537, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.078005115089514, |
|
"grad_norm": 3.44346022605896, |
|
"learning_rate": 1.3003694231315715e-05, |
|
"loss": 5.7098, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 2.10997442455243, |
|
"grad_norm": 5.481964588165283, |
|
"learning_rate": 1.2861608411480534e-05, |
|
"loss": 5.7149, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.1419437340153453, |
|
"grad_norm": 2.9816033840179443, |
|
"learning_rate": 1.2719522591645354e-05, |
|
"loss": 5.7585, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 2.1739130434782608, |
|
"grad_norm": 3.2157652378082275, |
|
"learning_rate": 1.2577436771810175e-05, |
|
"loss": 5.7277, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.2058823529411766, |
|
"grad_norm": 2.92006516456604, |
|
"learning_rate": 1.2435350951974994e-05, |
|
"loss": 5.7482, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 2.237851662404092, |
|
"grad_norm": 3.7664051055908203, |
|
"learning_rate": 1.2293265132139814e-05, |
|
"loss": 5.7115, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.2698209718670075, |
|
"grad_norm": 5.3445353507995605, |
|
"learning_rate": 1.2151179312304633e-05, |
|
"loss": 5.6895, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 2.3017902813299234, |
|
"grad_norm": 4.100110054016113, |
|
"learning_rate": 1.2009093492469452e-05, |
|
"loss": 5.7389, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.333759590792839, |
|
"grad_norm": 5.986413478851318, |
|
"learning_rate": 1.1867007672634272e-05, |
|
"loss": 5.7161, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 2.3657289002557547, |
|
"grad_norm": 4.717130661010742, |
|
"learning_rate": 1.1724921852799091e-05, |
|
"loss": 5.7123, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.39769820971867, |
|
"grad_norm": 2.833897352218628, |
|
"learning_rate": 1.158283603296391e-05, |
|
"loss": 5.7322, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 2.4296675191815855, |
|
"grad_norm": 3.9461288452148438, |
|
"learning_rate": 1.144075021312873e-05, |
|
"loss": 5.7421, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.4616368286445014, |
|
"grad_norm": 5.360823154449463, |
|
"learning_rate": 1.1298664393293551e-05, |
|
"loss": 5.7615, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 2.493606138107417, |
|
"grad_norm": 3.290187120437622, |
|
"learning_rate": 1.115657857345837e-05, |
|
"loss": 5.7493, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.5255754475703327, |
|
"grad_norm": 2.8723881244659424, |
|
"learning_rate": 1.101449275362319e-05, |
|
"loss": 5.7298, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 2.557544757033248, |
|
"grad_norm": 12.763352394104004, |
|
"learning_rate": 1.0872406933788009e-05, |
|
"loss": 5.7529, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.5895140664961636, |
|
"grad_norm": 3.423097610473633, |
|
"learning_rate": 1.0730321113952828e-05, |
|
"loss": 5.7318, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 2.6214833759590794, |
|
"grad_norm": 3.546499252319336, |
|
"learning_rate": 1.0588235294117648e-05, |
|
"loss": 5.7036, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.653452685421995, |
|
"grad_norm": 4.731326103210449, |
|
"learning_rate": 1.0446149474282467e-05, |
|
"loss": 5.7158, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 2.6854219948849103, |
|
"grad_norm": 5.279483318328857, |
|
"learning_rate": 1.0304063654447287e-05, |
|
"loss": 5.7209, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.717391304347826, |
|
"grad_norm": 5.814947605133057, |
|
"learning_rate": 1.0161977834612106e-05, |
|
"loss": 5.738, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 2.7493606138107416, |
|
"grad_norm": 4.115816116333008, |
|
"learning_rate": 1.0019892014776927e-05, |
|
"loss": 5.7337, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.781329923273657, |
|
"grad_norm": 4.176394462585449, |
|
"learning_rate": 9.877806194941746e-06, |
|
"loss": 5.713, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 2.813299232736573, |
|
"grad_norm": 3.36919903755188, |
|
"learning_rate": 9.735720375106566e-06, |
|
"loss": 5.7257, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.8452685421994883, |
|
"grad_norm": 4.4527482986450195, |
|
"learning_rate": 9.593634555271385e-06, |
|
"loss": 5.6958, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 2.877237851662404, |
|
"grad_norm": 7.66256856918335, |
|
"learning_rate": 9.451548735436205e-06, |
|
"loss": 5.7053, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.9092071611253196, |
|
"grad_norm": 11.90414810180664, |
|
"learning_rate": 9.309462915601024e-06, |
|
"loss": 5.7246, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 2.9411764705882355, |
|
"grad_norm": 3.27648663520813, |
|
"learning_rate": 9.167377095765843e-06, |
|
"loss": 5.7291, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.973145780051151, |
|
"grad_norm": 5.769582271575928, |
|
"learning_rate": 9.025291275930663e-06, |
|
"loss": 5.7139, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 5.851009845733643, |
|
"eval_runtime": 18.263, |
|
"eval_samples_per_second": 273.777, |
|
"eval_steps_per_second": 4.326, |
|
"eval_tr_ling_pearson_cosine": 0.06129823646086187, |
|
"eval_tr_ling_pearson_dot": 0.08667935948713909, |
|
"eval_tr_ling_pearson_euclidean": 0.050963674624173616, |
|
"eval_tr_ling_pearson_manhattan": 0.049471366228539336, |
|
"eval_tr_ling_pearson_max": 0.08667935948713909, |
|
"eval_tr_ling_spearman_cosine": 0.06262320788887717, |
|
"eval_tr_ling_spearman_dot": 0.0836754651265069, |
|
"eval_tr_ling_spearman_euclidean": 0.04874454654419082, |
|
"eval_tr_ling_spearman_manhattan": 0.04780108900980343, |
|
"eval_tr_ling_spearman_max": 0.0836754651265069, |
|
"step": 2346 |
|
}, |
|
{ |
|
"epoch": 3.0051150895140664, |
|
"grad_norm": 6.343133449554443, |
|
"learning_rate": 8.883205456095482e-06, |
|
"loss": 5.5715, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 3.0370843989769822, |
|
"grad_norm": 7.939487457275391, |
|
"learning_rate": 8.741119636260303e-06, |
|
"loss": 5.6558, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 3.0690537084398977, |
|
"grad_norm": 3.734879493713379, |
|
"learning_rate": 8.599033816425122e-06, |
|
"loss": 5.6441, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.101023017902813, |
|
"grad_norm": 6.058401584625244, |
|
"learning_rate": 8.456947996589942e-06, |
|
"loss": 5.6569, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 3.132992327365729, |
|
"grad_norm": 4.311662673950195, |
|
"learning_rate": 8.314862176754761e-06, |
|
"loss": 5.669, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 3.1649616368286444, |
|
"grad_norm": 8.782428741455078, |
|
"learning_rate": 8.17277635691958e-06, |
|
"loss": 5.6361, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 3.1969309462915603, |
|
"grad_norm": 7.427972793579102, |
|
"learning_rate": 8.0306905370844e-06, |
|
"loss": 5.6524, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.2289002557544757, |
|
"grad_norm": 5.069025993347168, |
|
"learning_rate": 7.88860471724922e-06, |
|
"loss": 5.6773, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 3.260869565217391, |
|
"grad_norm": 8.149388313293457, |
|
"learning_rate": 7.746518897414039e-06, |
|
"loss": 5.6552, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 3.292838874680307, |
|
"grad_norm": 6.453441619873047, |
|
"learning_rate": 7.604433077578858e-06, |
|
"loss": 5.6807, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 3.3248081841432224, |
|
"grad_norm": 6.5807719230651855, |
|
"learning_rate": 7.4623472577436775e-06, |
|
"loss": 5.6638, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.3567774936061383, |
|
"grad_norm": 10.392335891723633, |
|
"learning_rate": 7.320261437908497e-06, |
|
"loss": 5.6582, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 3.3887468030690537, |
|
"grad_norm": 9.251813888549805, |
|
"learning_rate": 7.178175618073316e-06, |
|
"loss": 5.658, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 3.420716112531969, |
|
"grad_norm": 5.527411460876465, |
|
"learning_rate": 7.036089798238136e-06, |
|
"loss": 5.6626, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 3.452685421994885, |
|
"grad_norm": 5.650461673736572, |
|
"learning_rate": 6.894003978402956e-06, |
|
"loss": 5.6802, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.4846547314578005, |
|
"grad_norm": 7.156338691711426, |
|
"learning_rate": 6.751918158567775e-06, |
|
"loss": 5.6377, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 3.516624040920716, |
|
"grad_norm": 6.843425750732422, |
|
"learning_rate": 6.6098323387325946e-06, |
|
"loss": 5.6752, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 3.5485933503836318, |
|
"grad_norm": 14.204697608947754, |
|
"learning_rate": 6.467746518897414e-06, |
|
"loss": 5.6573, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 3.580562659846547, |
|
"grad_norm": 3.9053664207458496, |
|
"learning_rate": 6.325660699062234e-06, |
|
"loss": 5.6963, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.612531969309463, |
|
"grad_norm": 13.336016654968262, |
|
"learning_rate": 6.1835748792270535e-06, |
|
"loss": 5.7007, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 3.6445012787723785, |
|
"grad_norm": 5.112432956695557, |
|
"learning_rate": 6.041489059391873e-06, |
|
"loss": 5.6746, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 3.6764705882352944, |
|
"grad_norm": 6.077632427215576, |
|
"learning_rate": 5.899403239556692e-06, |
|
"loss": 5.6312, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 3.70843989769821, |
|
"grad_norm": 10.304828643798828, |
|
"learning_rate": 5.757317419721512e-06, |
|
"loss": 5.5596, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.7404092071611252, |
|
"grad_norm": 9.45308780670166, |
|
"learning_rate": 5.615231599886332e-06, |
|
"loss": 5.7003, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 3.772378516624041, |
|
"grad_norm": 6.124211311340332, |
|
"learning_rate": 5.473145780051151e-06, |
|
"loss": 5.6739, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 3.8043478260869565, |
|
"grad_norm": 8.547770500183105, |
|
"learning_rate": 5.331059960215971e-06, |
|
"loss": 5.655, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 3.836317135549872, |
|
"grad_norm": 6.203834533691406, |
|
"learning_rate": 5.18897414038079e-06, |
|
"loss": 5.6787, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.868286445012788, |
|
"grad_norm": 4.0565643310546875, |
|
"learning_rate": 5.04688832054561e-06, |
|
"loss": 5.643, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 3.9002557544757033, |
|
"grad_norm": 9.590073585510254, |
|
"learning_rate": 4.90480250071043e-06, |
|
"loss": 5.6412, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 3.9322250639386187, |
|
"grad_norm": 9.556587219238281, |
|
"learning_rate": 4.762716680875249e-06, |
|
"loss": 5.758, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 3.9641943734015346, |
|
"grad_norm": 5.743387222290039, |
|
"learning_rate": 4.620630861040068e-06, |
|
"loss": 5.6769, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 3.99616368286445, |
|
"grad_norm": 7.73360013961792, |
|
"learning_rate": 4.478545041204888e-06, |
|
"loss": 5.7206, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 5.9124884605407715, |
|
"eval_runtime": 18.3869, |
|
"eval_samples_per_second": 271.933, |
|
"eval_steps_per_second": 4.297, |
|
"eval_tr_ling_pearson_cosine": 0.058743115070889876, |
|
"eval_tr_ling_pearson_dot": 0.08477622619519222, |
|
"eval_tr_ling_pearson_euclidean": 0.04709170917685587, |
|
"eval_tr_ling_pearson_manhattan": 0.04582145815494953, |
|
"eval_tr_ling_pearson_max": 0.08477622619519222, |
|
"eval_tr_ling_spearman_cosine": 0.059526247945378225, |
|
"eval_tr_ling_spearman_dot": 0.08243745050110735, |
|
"eval_tr_ling_spearman_euclidean": 0.04407504959649961, |
|
"eval_tr_ling_spearman_manhattan": 0.04331287037397966, |
|
"eval_tr_ling_spearman_max": 0.08243745050110735, |
|
"step": 3128 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 3910, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|