{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100.0, "global_step": 478, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 18.88450857996153, "learning_rate": 1.0416666666666666e-08, "logits/generated": -2.8085083961486816, "logits/oppo_generated": -2.8376712799072266, "logits/oppo_real": -2.8085083961486816, "logits/real": -2.8376712799072266, "logps/generated": -72.26029968261719, "logps/oppo_gen": -72.26029968261719, "logps/oppo_real": -321.1210021972656, "logps/real": -321.1210021972656, "loss": 1.9028, "loss/gen": 1.7014132738113403, "loss/real": 0.20141328871250153, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 1 }, { "epoch": 0.0, "grad_norm": 20.256159846297372, "learning_rate": 2.083333333333333e-08, "logits/generated": -2.558225154876709, "logits/oppo_generated": -2.680725574493408, "logits/oppo_real": -2.558225154876709, "logits/real": -2.680725574493408, "logps/generated": -77.56204223632812, "logps/oppo_gen": -77.56204223632812, "logps/oppo_real": -309.978271484375, "logps/real": -309.978271484375, "loss": 1.9028, "loss/gen": 1.7014132738113403, "loss/real": 0.20141328871250153, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 2 }, { "epoch": 0.01, "grad_norm": 20.0893982821747, "learning_rate": 3.125e-08, "logits/generated": -2.7066292762756348, "logits/oppo_generated": -2.7376646995544434, "logits/oppo_real": -2.707059860229492, "logits/real": -2.7373762130737305, "logps/generated": -96.45381164550781, "logps/oppo_gen": -96.59260559082031, "logps/oppo_real": -345.4535827636719, "logps/real": -345.49041748046875, "loss": 1.903, "loss/gen": 1.7025485038757324, "loss/real": 0.20148169994354248, "rewards/accuracies": 0.3125, "rewards/generated": 0.13880300521850586, "rewards/margins": -0.17563974857330322, "rewards/real": -0.03683674335479736, "step": 3 }, { "epoch": 0.01, "grad_norm": 18.968951103237924, "learning_rate": 4.166666666666666e-08, "logits/generated": -2.8126296997070312, "logits/oppo_generated": -2.7261557579040527, "logits/oppo_real": -2.8125224113464355, "logits/real": -2.7257490158081055, "logps/generated": -69.03158569335938, "logps/oppo_gen": -69.00636291503906, "logps/oppo_real": -227.8314208984375, "logps/real": -227.7684326171875, "loss": 1.9026, "loss/gen": 1.7012073993682861, "loss/real": 0.20130020380020142, "rewards/accuracies": 0.6875, "rewards/generated": -0.02523219585418701, "rewards/margins": 0.08823049068450928, "rewards/real": 0.06299829483032227, "step": 4 }, { "epoch": 0.01, "grad_norm": 20.702301723017644, "learning_rate": 5.208333333333333e-08, "logits/generated": -2.839081287384033, "logits/oppo_generated": -2.9415202140808105, "logits/oppo_real": -2.8390822410583496, "logits/real": -2.9421164989471436, "logps/generated": -73.87603759765625, "logps/oppo_gen": -73.82575988769531, "logps/oppo_real": -342.5164794921875, "logps/real": -342.60211181640625, "loss": 1.9028, "loss/gen": 1.7010023593902588, "loss/real": 0.20157013833522797, "rewards/accuracies": 0.5, "rewards/generated": -0.050284504890441895, "rewards/margins": -0.035373032093048096, "rewards/real": -0.08565753698348999, "step": 5 }, { "epoch": 0.01, "grad_norm": 18.434664440323935, "learning_rate": 6.25e-08, "logits/generated": -2.6757049560546875, "logits/oppo_generated": -2.6438450813293457, "logits/oppo_real": -2.675816059112549, "logits/real": -2.6441245079040527, "logps/generated": -86.78526306152344, "logps/oppo_gen": -86.71319580078125, "logps/oppo_real": -326.2207946777344, "logps/real": -326.25872802734375, "loss": 1.9024, "loss/gen": 1.70082426071167, "loss/real": 0.20148399472236633, "rewards/accuracies": 0.625, "rewards/generated": -0.07205724716186523, "rewards/margins": 0.03410625457763672, "rewards/real": -0.037950992584228516, "step": 6 }, { "epoch": 0.01, "grad_norm": 19.57938365741621, "learning_rate": 7.291666666666667e-08, "logits/generated": -2.7598161697387695, "logits/oppo_generated": -2.8367371559143066, "logits/oppo_real": -2.759450912475586, "logits/real": -2.837430953979492, "logps/generated": -71.20977783203125, "logps/oppo_gen": -71.09225463867188, "logps/oppo_real": -338.93975830078125, "logps/real": -339.0378112792969, "loss": 1.9023, "loss/gen": 1.7004528045654297, "loss/real": 0.20159286260604858, "rewards/accuracies": 0.5625, "rewards/generated": -0.117523193359375, "rewards/margins": 0.01949334144592285, "rewards/real": -0.09802985191345215, "step": 7 }, { "epoch": 0.02, "grad_norm": 18.098258660786524, "learning_rate": 8.333333333333333e-08, "logits/generated": -2.9020438194274902, "logits/oppo_generated": -2.737520694732666, "logits/oppo_real": -2.9021873474121094, "logits/real": -2.738926410675049, "logps/generated": -57.77384948730469, "logps/oppo_gen": -57.62103271484375, "logps/oppo_real": -273.72210693359375, "logps/real": -273.86004638671875, "loss": 1.9019, "loss/gen": 1.7001641988754272, "loss/real": 0.20166659355163574, "rewards/accuracies": 0.6875, "rewards/generated": -0.1528165340423584, "rewards/margins": 0.014842987060546875, "rewards/real": -0.13797354698181152, "step": 8 }, { "epoch": 0.02, "grad_norm": 18.096628859382037, "learning_rate": 9.375e-08, "logits/generated": -2.5104784965515137, "logits/oppo_generated": -2.585773468017578, "logits/oppo_real": -2.5103254318237305, "logits/real": -2.5853567123413086, "logps/generated": -56.835174560546875, "logps/oppo_gen": -56.53358459472656, "logps/oppo_real": -179.40626525878906, "logps/real": -179.69265747070312, "loss": 1.9006, "loss/gen": 1.6989485025405884, "loss/real": 0.2019369751214981, "rewards/accuracies": 0.5, "rewards/generated": -0.3015878200531006, "rewards/margins": 0.015174269676208496, "rewards/real": -0.2864135503768921, "step": 9 }, { "epoch": 0.02, "grad_norm": 19.683420491274582, "learning_rate": 1.0416666666666667e-07, "logits/generated": -2.6931896209716797, "logits/oppo_generated": -2.686525344848633, "logits/oppo_real": -2.6955156326293945, "logits/real": -2.6843934059143066, "logps/generated": -62.24772262573242, "logps/oppo_gen": -61.876277923583984, "logps/oppo_real": -247.48609924316406, "logps/real": -247.6177520751953, "loss": 1.8999, "loss/gen": 1.698378086090088, "loss/real": 0.20165444910526276, "rewards/accuracies": 0.75, "rewards/generated": -0.3714407682418823, "rewards/margins": 0.23977923393249512, "rewards/real": -0.1316615343093872, "step": 10 }, { "epoch": 0.02, "grad_norm": 17.437652259079883, "learning_rate": 1.1458333333333332e-07, "logits/generated": -2.916860818862915, "logits/oppo_generated": -2.79710054397583, "logits/oppo_real": -2.9184556007385254, "logits/real": -2.795858383178711, "logps/generated": -77.727783203125, "logps/oppo_gen": -77.069091796875, "logps/oppo_real": -262.61822509765625, "logps/real": -262.8753662109375, "loss": 1.8994, "loss/gen": 1.6960327625274658, "loss/real": 0.20188409090042114, "rewards/accuracies": 0.6875, "rewards/generated": -0.6587002277374268, "rewards/margins": 0.40155017375946045, "rewards/real": -0.2571500539779663, "step": 11 }, { "epoch": 0.03, "grad_norm": 18.252566640471244, "learning_rate": 1.25e-07, "logits/generated": -2.99216890335083, "logits/oppo_generated": -2.744597911834717, "logits/oppo_real": -2.9952921867370605, "logits/real": -2.742313861846924, "logps/generated": -61.30070114135742, "logps/oppo_gen": -60.12853240966797, "logps/oppo_real": -190.38400268554688, "logps/real": -191.3928680419922, "loss": 1.8945, "loss/gen": 1.6918413639068604, "loss/real": 0.20326292514801025, "rewards/accuracies": 0.5, "rewards/generated": -1.1721669435501099, "rewards/margins": 0.16329419612884521, "rewards/real": -1.0088727474212646, "step": 12 }, { "epoch": 0.03, "grad_norm": 18.974558611775894, "learning_rate": 1.3541666666666666e-07, "logits/generated": -2.887937545776367, "logits/oppo_generated": -2.9385900497436523, "logits/oppo_real": -2.8927111625671387, "logits/real": -2.933300256729126, "logps/generated": -86.03516387939453, "logps/oppo_gen": -84.29090118408203, "logps/oppo_real": -449.3851318359375, "logps/real": -450.25927734375, "loss": 1.8918, "loss/gen": 1.687178134918213, "loss/real": 0.2030172348022461, "rewards/accuracies": 0.875, "rewards/generated": -1.744260549545288, "rewards/margins": 0.8700805902481079, "rewards/real": -0.8741799592971802, "step": 13 }, { "epoch": 0.03, "grad_norm": 18.025631774750817, "learning_rate": 1.4583333333333335e-07, "logits/generated": -2.9185471534729004, "logits/oppo_generated": -2.829657793045044, "logits/oppo_real": -2.9221749305725098, "logits/real": -2.8240890502929688, "logps/generated": -90.33718872070312, "logps/oppo_gen": -88.6211166381836, "logps/oppo_real": -346.31817626953125, "logps/real": -347.5406494140625, "loss": 1.8906, "loss/gen": 1.6874079704284668, "loss/real": 0.20365647971630096, "rewards/accuracies": 0.9375, "rewards/generated": -1.716069221496582, "rewards/margins": 0.4936128854751587, "rewards/real": -1.2224563360214233, "step": 14 }, { "epoch": 0.03, "grad_norm": 18.862430656816294, "learning_rate": 1.5624999999999999e-07, "logits/generated": -2.743849754333496, "logits/oppo_generated": -2.6369616985321045, "logits/oppo_real": -2.745821475982666, "logits/real": -2.6341629028320312, "logps/generated": -60.22393798828125, "logps/oppo_gen": -58.13185501098633, "logps/oppo_real": -183.13169860839844, "logps/real": -184.82443237304688, "loss": 1.8892, "loss/gen": 1.6843458414077759, "loss/real": 0.20452776551246643, "rewards/accuracies": 0.6875, "rewards/generated": -2.09208607673645, "rewards/margins": 0.39935922622680664, "rewards/real": -1.6927268505096436, "step": 15 }, { "epoch": 0.03, "grad_norm": 18.85043818523566, "learning_rate": 1.6666666666666665e-07, "logits/generated": -2.775763750076294, "logits/oppo_generated": -2.8875584602355957, "logits/oppo_real": -2.7887091636657715, "logits/real": -2.872068405151367, "logps/generated": -93.86468505859375, "logps/oppo_gen": -89.01815795898438, "logps/oppo_real": -355.2412414550781, "logps/real": -358.21697998046875, "loss": 1.8728, "loss/gen": 1.661976933479309, "loss/real": 0.2069297432899475, "rewards/accuracies": 0.9375, "rewards/generated": -4.846524238586426, "rewards/margins": 1.8708148002624512, "rewards/real": -2.9757091999053955, "step": 16 }, { "epoch": 0.04, "grad_norm": 18.85426576142237, "learning_rate": 1.7708333333333334e-07, "logits/generated": -2.926854133605957, "logits/oppo_generated": -2.791188955307007, "logits/oppo_real": -2.941716194152832, "logits/real": -2.772773265838623, "logps/generated": -83.66239929199219, "logps/oppo_gen": -78.4744873046875, "logps/oppo_real": -372.87506103515625, "logps/real": -375.8855285644531, "loss": 1.867, "loss/gen": 1.659225583076477, "loss/real": 0.2069997638463974, "rewards/accuracies": 1.0, "rewards/generated": -5.187913417816162, "rewards/margins": 2.1774630546569824, "rewards/real": -3.0104501247406006, "step": 17 }, { "epoch": 0.04, "grad_norm": 18.277145472716995, "learning_rate": 1.875e-07, "logits/generated": -2.619422197341919, "logits/oppo_generated": -2.6162495613098145, "logits/oppo_real": -2.63336443901062, "logits/real": -2.597346782684326, "logps/generated": -91.84591674804688, "logps/oppo_gen": -85.63116455078125, "logps/oppo_real": -327.649169921875, "logps/real": -331.60296630859375, "loss": 1.861, "loss/gen": 1.6509480476379395, "loss/real": 0.20878931879997253, "rewards/accuracies": 0.8125, "rewards/generated": -6.214766502380371, "rewards/margins": 2.260969638824463, "rewards/real": -3.953796863555908, "step": 18 }, { "epoch": 0.04, "grad_norm": 17.27219102382995, "learning_rate": 1.9791666666666664e-07, "logits/generated": -2.609903335571289, "logits/oppo_generated": -2.712047576904297, "logits/oppo_real": -2.627098321914673, "logits/real": -2.693801164627075, "logps/generated": -86.11844635009766, "logps/oppo_gen": -79.7235107421875, "logps/oppo_real": -208.48953247070312, "logps/real": -212.92434692382812, "loss": 1.8585, "loss/gen": 1.64948308467865, "loss/real": 0.20968025922775269, "rewards/accuracies": 1.0, "rewards/generated": -6.394937515258789, "rewards/margins": 1.9601283073425293, "rewards/real": -4.434809684753418, "step": 19 }, { "epoch": 0.04, "grad_norm": 18.456748096373012, "learning_rate": 2.0833333333333333e-07, "logits/generated": -2.788766384124756, "logits/oppo_generated": -2.751258373260498, "logits/oppo_real": -2.8082375526428223, "logits/real": -2.7272610664367676, "logps/generated": -74.62376403808594, "logps/oppo_gen": -67.47988891601562, "logps/oppo_real": -222.2954864501953, "logps/real": -227.22134399414062, "loss": 1.8499, "loss/gen": 1.6434563398361206, "loss/real": 0.2105972170829773, "rewards/accuracies": 0.9375, "rewards/generated": -7.143871307373047, "rewards/margins": 2.2180023193359375, "rewards/real": -4.925868988037109, "step": 20 }, { "epoch": 0.04, "grad_norm": 18.999819302954485, "learning_rate": 2.1875e-07, "logits/generated": -2.950169086456299, "logits/oppo_generated": -2.8267569541931152, "logits/oppo_real": -2.9816665649414062, "logits/real": -2.7851290702819824, "logps/generated": -86.47335815429688, "logps/oppo_gen": -74.723388671875, "logps/oppo_real": -304.63775634765625, "logps/real": -312.2291259765625, "loss": 1.8274, "loss/gen": 1.6065895557403564, "loss/real": 0.2158581167459488, "rewards/accuracies": 0.9375, "rewards/generated": -11.74997615814209, "rewards/margins": 4.158588409423828, "rewards/real": -7.59138822555542, "step": 21 }, { "epoch": 0.05, "grad_norm": 17.589315014677144, "learning_rate": 2.2916666666666663e-07, "logits/generated": -2.783734083175659, "logits/oppo_generated": -2.7697672843933105, "logits/oppo_real": -2.8284473419189453, "logits/real": -2.7101640701293945, "logps/generated": -88.10362243652344, "logps/oppo_gen": -71.9073715209961, "logps/oppo_real": -272.13043212890625, "logps/real": -283.3026123046875, "loss": 1.8022, "loss/gen": 1.5712285041809082, "loss/real": 0.22294577956199646, "rewards/accuracies": 0.9375, "rewards/generated": -16.19625473022461, "rewards/margins": 5.024064540863037, "rewards/real": -11.172189712524414, "step": 22 }, { "epoch": 0.05, "grad_norm": 19.18362119156559, "learning_rate": 2.3958333333333335e-07, "logits/generated": -3.011516571044922, "logits/oppo_generated": -2.745267391204834, "logits/oppo_real": -3.053928852081299, "logits/real": -2.679607391357422, "logps/generated": -95.36741638183594, "logps/oppo_gen": -75.51863098144531, "logps/oppo_real": -319.1461486816406, "logps/real": -332.5950927734375, "loss": 1.7799, "loss/gen": 1.5424509048461914, "loss/real": 0.22752627730369568, "rewards/accuracies": 1.0, "rewards/generated": -19.848791122436523, "rewards/margins": 6.399872303009033, "rewards/real": -13.448917388916016, "step": 23 }, { "epoch": 0.05, "grad_norm": 17.90351695029368, "learning_rate": 2.5e-07, "logits/generated": -2.585099697113037, "logits/oppo_generated": -2.7807955741882324, "logits/oppo_real": -2.645164966583252, "logits/real": -2.711965799331665, "logps/generated": -89.25286102294922, "logps/oppo_gen": -71.00718688964844, "logps/oppo_real": -250.61138916015625, "logps/real": -262.8109130859375, "loss": 1.7768, "loss/gen": 1.554997444152832, "loss/real": 0.22490805387496948, "rewards/accuracies": 1.0, "rewards/generated": -18.245668411254883, "rewards/margins": 6.046163558959961, "rewards/real": -12.199504852294922, "step": 24 }, { "epoch": 0.05, "grad_norm": 18.1587231735858, "learning_rate": 2.604166666666667e-07, "logits/generated": -2.7362494468688965, "logits/oppo_generated": -2.7489709854125977, "logits/oppo_real": -2.7923738956451416, "logits/real": -2.6788840293884277, "logps/generated": -75.5267333984375, "logps/oppo_gen": -55.94059371948242, "logps/oppo_real": -216.20614624023438, "logps/real": -232.888916015625, "loss": 1.7646, "loss/gen": 1.544620156288147, "loss/real": 0.23424160480499268, "rewards/accuracies": 0.8125, "rewards/generated": -19.58614158630371, "rewards/margins": 2.9033803939819336, "rewards/real": -16.68276023864746, "step": 25 }, { "epoch": 0.05, "grad_norm": 19.878306371745666, "learning_rate": 2.708333333333333e-07, "logits/generated": -2.8480935096740723, "logits/oppo_generated": -2.759331703186035, "logits/oppo_real": -2.911282539367676, "logits/real": -2.6749706268310547, "logps/generated": -110.72230529785156, "logps/oppo_gen": -87.22213745117188, "logps/oppo_real": -327.4888000488281, "logps/real": -345.6739196777344, "loss": 1.7452, "loss/gen": 1.5140717029571533, "loss/real": 0.2376970797777176, "rewards/accuracies": 0.875, "rewards/generated": -23.50015640258789, "rewards/margins": 5.315024375915527, "rewards/real": -18.18513298034668, "step": 26 }, { "epoch": 0.06, "grad_norm": 20.7959915993819, "learning_rate": 2.8125e-07, "logits/generated": -2.706425666809082, "logits/oppo_generated": -2.980116605758667, "logits/oppo_real": -2.7865042686462402, "logits/real": -2.8899600505828857, "logps/generated": -115.33242797851562, "logps/oppo_gen": -87.22333526611328, "logps/oppo_real": -424.35565185546875, "logps/real": -440.6988525390625, "loss": 1.7307, "loss/gen": 1.4784982204437256, "loss/real": 0.23403891921043396, "rewards/accuracies": 1.0, "rewards/generated": -28.109092712402344, "rewards/margins": 11.765887260437012, "rewards/real": -16.343204498291016, "step": 27 }, { "epoch": 0.06, "grad_norm": 22.86794284710791, "learning_rate": 2.916666666666667e-07, "logits/generated": -2.640286922454834, "logits/oppo_generated": -2.8235785961151123, "logits/oppo_real": -2.736990451812744, "logits/real": -2.7280538082122803, "logps/generated": -107.66032409667969, "logps/oppo_gen": -73.19400024414062, "logps/oppo_real": -275.0092468261719, "logps/real": -299.7300720214844, "loss": 1.6961, "loss/gen": 1.429884910583496, "loss/real": 0.2523398995399475, "rewards/accuracies": 0.875, "rewards/generated": -34.46632385253906, "rewards/margins": 9.745501518249512, "rewards/real": -24.720821380615234, "step": 28 }, { "epoch": 0.06, "grad_norm": 22.34167248864426, "learning_rate": 3.020833333333333e-07, "logits/generated": -2.7891035079956055, "logits/oppo_generated": -2.7930147647857666, "logits/oppo_real": -2.9117727279663086, "logits/real": -2.663120746612549, "logps/generated": -125.35220336914062, "logps/oppo_gen": -80.30975341796875, "logps/oppo_real": -230.66831970214844, "logps/real": -268.874267578125, "loss": 1.6126, "loss/gen": 1.3512537479400635, "loss/real": 0.28433263301849365, "rewards/accuracies": 0.9375, "rewards/generated": -45.04244613647461, "rewards/margins": 6.836463928222656, "rewards/real": -38.20598220825195, "step": 29 }, { "epoch": 0.06, "grad_norm": 19.833808871854, "learning_rate": 3.1249999999999997e-07, "logits/generated": -2.690408229827881, "logits/oppo_generated": -2.6976568698883057, "logits/oppo_real": -2.8616366386413574, "logits/real": -2.543367862701416, "logps/generated": -142.45785522460938, "logps/oppo_gen": -77.94517517089844, "logps/oppo_real": -253.06488037109375, "logps/real": -304.15667724609375, "loss": 1.5531, "loss/gen": 1.2099614143371582, "loss/real": 0.317098468542099, "rewards/accuracies": 0.9375, "rewards/generated": -64.51268005371094, "rewards/margins": 13.420904159545898, "rewards/real": -51.09177780151367, "step": 30 }, { "epoch": 0.06, "grad_norm": 19.015498214963532, "learning_rate": 3.2291666666666666e-07, "logits/generated": -2.32857084274292, "logits/oppo_generated": -2.3693275451660156, "logits/oppo_real": -2.4662370681762695, "logits/real": -2.223062038421631, "logps/generated": -140.2643585205078, "logps/oppo_gen": -67.86835479736328, "logps/oppo_real": -219.30712890625, "logps/real": -277.8334655761719, "loss": 1.487, "loss/gen": 1.1561675071716309, "loss/real": 0.33837342262268066, "rewards/accuracies": 0.875, "rewards/generated": -72.39601135253906, "rewards/margins": 13.869674682617188, "rewards/real": -58.526336669921875, "step": 31 }, { "epoch": 0.07, "grad_norm": 17.094015115917124, "learning_rate": 3.333333333333333e-07, "logits/generated": -2.578625202178955, "logits/oppo_generated": -2.723201274871826, "logits/oppo_real": -2.7767179012298584, "logits/real": -2.5331850051879883, "logps/generated": -151.8267364501953, "logps/oppo_gen": -71.2327880859375, "logps/oppo_real": -306.9924011230469, "logps/real": -375.8905029296875, "loss": 1.4524, "loss/gen": 1.1011745929718018, "loss/real": 0.368760347366333, "rewards/accuracies": 0.875, "rewards/generated": -80.59394073486328, "rewards/margins": 11.695858001708984, "rewards/real": -68.89808654785156, "step": 32 }, { "epoch": 0.07, "grad_norm": 15.386577434137806, "learning_rate": 3.4375e-07, "logits/generated": -2.610579013824463, "logits/oppo_generated": -2.8122611045837402, "logits/oppo_real": -2.822584629058838, "logits/real": -2.614462375640869, "logps/generated": -169.17803955078125, "logps/oppo_gen": -74.99469757080078, "logps/oppo_real": -295.076904296875, "logps/real": -373.369384765625, "loss": 1.4379, "loss/gen": 1.0121688842773438, "loss/real": 0.3995278775691986, "rewards/accuracies": 0.875, "rewards/generated": -94.18334197998047, "rewards/margins": 15.890843391418457, "rewards/real": -78.2925033569336, "step": 33 }, { "epoch": 0.07, "grad_norm": 15.728951573532772, "learning_rate": 3.541666666666667e-07, "logits/generated": -2.2474634647369385, "logits/oppo_generated": -2.425895929336548, "logits/oppo_real": -2.4456372261047363, "logits/real": -2.2315173149108887, "logps/generated": -156.623291015625, "logps/oppo_gen": -62.23015594482422, "logps/oppo_real": -248.11083984375, "logps/real": -324.737548828125, "loss": 1.3863, "loss/gen": 1.011872410774231, "loss/real": 0.39403319358825684, "rewards/accuracies": 0.9375, "rewards/generated": -94.39312744140625, "rewards/margins": 17.766422271728516, "rewards/real": -76.626708984375, "step": 34 }, { "epoch": 0.07, "grad_norm": 16.02933768900159, "learning_rate": 3.645833333333333e-07, "logits/generated": -2.539496421813965, "logits/oppo_generated": -2.8763086795806885, "logits/oppo_real": -2.7546887397766113, "logits/real": -2.63142728805542, "logps/generated": -212.8411865234375, "logps/oppo_gen": -98.69316101074219, "logps/oppo_real": -410.1753234863281, "logps/real": -498.81109619140625, "loss": 1.3634, "loss/gen": 0.8914888501167297, "loss/real": 0.43470653891563416, "rewards/accuracies": 1.0, "rewards/generated": -114.14802551269531, "rewards/margins": 25.512226104736328, "rewards/real": -88.63580322265625, "step": 35 }, { "epoch": 0.08, "grad_norm": 15.791104886304613, "learning_rate": 3.75e-07, "logits/generated": -2.6440393924713135, "logits/oppo_generated": -2.7524499893188477, "logits/oppo_real": -2.870028018951416, "logits/real": -2.501885175704956, "logps/generated": -183.30636596679688, "logps/oppo_gen": -62.94434356689453, "logps/oppo_real": -279.46759033203125, "logps/real": -381.0328369140625, "loss": 1.3279, "loss/gen": 0.8554569482803345, "loss/real": 0.4819982051849365, "rewards/accuracies": 0.875, "rewards/generated": -120.36199951171875, "rewards/margins": 18.79672622680664, "rewards/real": -101.56527709960938, "step": 36 }, { "epoch": 0.08, "grad_norm": 12.472073296155996, "learning_rate": 3.8541666666666665e-07, "logits/generated": -2.3085546493530273, "logits/oppo_generated": -2.651912212371826, "logits/oppo_real": -2.5685677528381348, "logits/real": -2.394282817840576, "logps/generated": -216.96817016601562, "logps/oppo_gen": -81.71993255615234, "logps/oppo_real": -309.4556884765625, "logps/real": -421.0320739746094, "loss": 1.2951, "loss/gen": 0.7755213975906372, "loss/real": 0.5243090391159058, "rewards/accuracies": 0.9375, "rewards/generated": -135.24826049804688, "rewards/margins": 23.671871185302734, "rewards/real": -111.57638549804688, "step": 37 }, { "epoch": 0.08, "grad_norm": 10.256856346455107, "learning_rate": 3.958333333333333e-07, "logits/generated": -2.6325173377990723, "logits/oppo_generated": -2.9185829162597656, "logits/oppo_real": -2.9221343994140625, "logits/real": -2.6381330490112305, "logps/generated": -244.53273010253906, "logps/oppo_gen": -81.54811096191406, "logps/oppo_real": -367.9358215332031, "logps/real": -507.13909912109375, "loss": 1.2766, "loss/gen": 0.633612871170044, "loss/real": 0.6428331136703491, "rewards/accuracies": 0.75, "rewards/generated": -162.984619140625, "rewards/margins": 23.781320571899414, "rewards/real": -139.20330810546875, "step": 38 }, { "epoch": 0.08, "grad_norm": 9.021470209652767, "learning_rate": 4.0625e-07, "logits/generated": -2.6545820236206055, "logits/oppo_generated": -2.6453075408935547, "logits/oppo_real": -2.9239563941955566, "logits/real": -2.370694637298584, "logps/generated": -236.71627807617188, "logps/oppo_gen": -69.3997802734375, "logps/oppo_real": -195.3793487548828, "logps/real": -335.74542236328125, "loss": 1.2783, "loss/gen": 0.6135517358779907, "loss/real": 0.6481477618217468, "rewards/accuracies": 1.0, "rewards/generated": -167.3164825439453, "rewards/margins": 26.950382232666016, "rewards/real": -140.3660888671875, "step": 39 }, { "epoch": 0.08, "grad_norm": 9.477091020530814, "learning_rate": 4.1666666666666667e-07, "logits/generated": -2.1519742012023926, "logits/oppo_generated": -2.5346462726593018, "logits/oppo_real": -2.469729423522949, "logits/real": -2.239739418029785, "logps/generated": -285.01654052734375, "logps/oppo_gen": -107.60747528076172, "logps/oppo_real": -305.470703125, "logps/real": -444.7705078125, "loss": 1.2708, "loss/gen": 0.5709100365638733, "loss/real": 0.6454157829284668, "rewards/accuracies": 1.0, "rewards/generated": -177.4090576171875, "rewards/margins": 38.10923767089844, "rewards/real": -139.29981994628906, "step": 40 }, { "epoch": 0.09, "grad_norm": 9.641810018209553, "learning_rate": 4.270833333333333e-07, "logits/generated": -2.347841739654541, "logits/oppo_generated": -2.7028918266296387, "logits/oppo_real": -2.7112483978271484, "logits/real": -2.375851631164551, "logps/generated": -244.73318481445312, "logps/oppo_gen": -65.4095687866211, "logps/oppo_real": -249.814208984375, "logps/real": -396.6846923828125, "loss": 1.2691, "loss/gen": 0.562430739402771, "loss/real": 0.6824460029602051, "rewards/accuracies": 0.9375, "rewards/generated": -179.3236083984375, "rewards/margins": 32.453147888183594, "rewards/real": -146.87045288085938, "step": 41 }, { "epoch": 0.09, "grad_norm": 10.853774128928466, "learning_rate": 4.375e-07, "logits/generated": -2.490652084350586, "logits/oppo_generated": -2.747931957244873, "logits/oppo_real": -2.890165328979492, "logits/real": -2.3857312202453613, "logps/generated": -266.1440734863281, "logps/oppo_gen": -75.59771728515625, "logps/oppo_real": -321.30108642578125, "logps/real": -481.9388122558594, "loss": 1.2527, "loss/gen": 0.516680121421814, "loss/real": 0.7500206828117371, "rewards/accuracies": 0.875, "rewards/generated": -190.54635620117188, "rewards/margins": 29.908649444580078, "rewards/real": -160.63771057128906, "step": 42 }, { "epoch": 0.09, "grad_norm": 11.695829799222576, "learning_rate": 4.479166666666667e-07, "logits/generated": -2.4598608016967773, "logits/oppo_generated": -2.7500972747802734, "logits/oppo_real": -2.8781628608703613, "logits/real": -2.4157962799072266, "logps/generated": -250.79949951171875, "logps/oppo_gen": -61.141502380371094, "logps/oppo_real": -208.20816040039062, "logps/real": -371.35186767578125, "loss": 1.2732, "loss/gen": 0.5206349492073059, "loss/real": 0.7657498121261597, "rewards/accuracies": 0.8125, "rewards/generated": -189.65798950195312, "rewards/margins": 26.514284133911133, "rewards/real": -163.1437225341797, "step": 43 }, { "epoch": 0.09, "grad_norm": 12.721230754689678, "learning_rate": 4.5833333333333327e-07, "logits/generated": -2.316429615020752, "logits/oppo_generated": -2.86560320854187, "logits/oppo_real": -2.7520575523376465, "logits/real": -2.474339008331299, "logps/generated": -267.144287109375, "logps/oppo_gen": -70.71253204345703, "logps/oppo_real": -288.21905517578125, "logps/real": -453.522216796875, "loss": 1.2493, "loss/gen": 0.4936552047729492, "loss/real": 0.7759716510772705, "rewards/accuracies": 0.9375, "rewards/generated": -196.4317626953125, "rewards/margins": 31.12860107421875, "rewards/real": -165.30316162109375, "step": 44 }, { "epoch": 0.09, "grad_norm": 13.630179724288533, "learning_rate": 4.6874999999999996e-07, "logits/generated": -2.31591796875, "logits/oppo_generated": -2.6641106605529785, "logits/oppo_real": -2.687716484069824, "logits/real": -2.3076515197753906, "logps/generated": -257.56488037109375, "logps/oppo_gen": -69.85922241210938, "logps/oppo_real": -253.47152709960938, "logps/real": -407.678466796875, "loss": 1.2267, "loss/gen": 0.5265874862670898, "loss/real": 0.7205492258071899, "rewards/accuracies": 0.9375, "rewards/generated": -187.70565795898438, "rewards/margins": 33.498741149902344, "rewards/real": -154.20692443847656, "step": 45 }, { "epoch": 0.1, "grad_norm": 9.876900019322825, "learning_rate": 4.791666666666667e-07, "logits/generated": -2.3460116386413574, "logits/oppo_generated": -2.6993772983551025, "logits/oppo_real": -2.7424378395080566, "logits/real": -2.3106417655944824, "logps/generated": -253.16329956054688, "logps/oppo_gen": -71.79619598388672, "logps/oppo_real": -243.79006958007812, "logps/real": -369.48553466796875, "loss": 1.1683, "loss/gen": 0.5725552439689636, "loss/real": 0.5813912749290466, "rewards/accuracies": 0.875, "rewards/generated": -181.36709594726562, "rewards/margins": 55.67161178588867, "rewards/real": -125.69548034667969, "step": 46 }, { "epoch": 0.1, "grad_norm": 12.87668280113079, "learning_rate": 4.895833333333333e-07, "logits/generated": -2.4254989624023438, "logits/oppo_generated": -2.7546112537384033, "logits/oppo_real": -2.8327903747558594, "logits/real": -2.399583339691162, "logps/generated": -216.5120391845703, "logps/oppo_gen": -68.76174926757812, "logps/oppo_real": -289.6665954589844, "logps/real": -394.17529296875, "loss": 1.2136, "loss/gen": 0.7142927646636963, "loss/real": 0.4956602156162262, "rewards/accuracies": 1.0, "rewards/generated": -147.7502899169922, "rewards/margins": 43.241600036621094, "rewards/real": -104.50868225097656, "step": 47 }, { "epoch": 0.1, "grad_norm": 12.230479978436238, "learning_rate": 5e-07, "logits/generated": -2.3337907791137695, "logits/oppo_generated": -2.5967700481414795, "logits/oppo_real": -2.6954846382141113, "logits/real": -2.262849807739258, "logps/generated": -232.96676635742188, "logps/oppo_gen": -75.533935546875, "logps/oppo_real": -281.36871337890625, "logps/real": -386.46337890625, "loss": 1.183, "loss/gen": 0.6721818447113037, "loss/real": 0.49635791778564453, "rewards/accuracies": 0.9375, "rewards/generated": -157.43283081054688, "rewards/margins": 52.33815002441406, "rewards/real": -105.09467315673828, "step": 48 }, { "epoch": 0.1, "grad_norm": 10.696963754698617, "learning_rate": 4.999933277714308e-07, "logits/generated": -2.2673449516296387, "logits/oppo_generated": -2.512704849243164, "logits/oppo_real": -2.652205467224121, "logits/real": -2.222139596939087, "logps/generated": -239.5457763671875, "logps/oppo_gen": -80.6132583618164, "logps/oppo_real": -411.4334411621094, "logps/real": -531.0726318359375, "loss": 1.1722, "loss/gen": 0.6539755463600159, "loss/real": 0.5593386888504028, "rewards/accuracies": 0.9375, "rewards/generated": -158.93252563476562, "rewards/margins": 39.29335021972656, "rewards/real": -119.63916778564453, "step": 49 }, { "epoch": 0.1, "grad_norm": 12.920776172861276, "learning_rate": 4.999733114418725e-07, "logits/generated": -2.5660576820373535, "logits/oppo_generated": -2.7457919120788574, "logits/oppo_real": -2.8892219066619873, "logits/real": -2.426638126373291, "logps/generated": -282.44354248046875, "logps/oppo_gen": -84.30994415283203, "logps/oppo_real": -332.35064697265625, "logps/real": -460.3952331542969, "loss": 1.151, "loss/gen": 0.4987773597240448, "loss/real": 0.6023204326629639, "rewards/accuracies": 0.9375, "rewards/generated": -198.13360595703125, "rewards/margins": 70.08900451660156, "rewards/real": -128.04458618164062, "step": 50 }, { "epoch": 0.11, "grad_norm": 11.308270248503243, "learning_rate": 4.999399520797532e-07, "logits/generated": -2.2724039554595947, "logits/oppo_generated": -2.718198776245117, "logits/oppo_real": -2.615701675415039, "logits/real": -2.3728127479553223, "logps/generated": -255.1387939453125, "logps/oppo_gen": -69.3196792602539, "logps/oppo_real": -302.4152526855469, "logps/real": -413.5482177734375, "loss": 1.1335, "loss/gen": 0.5399003028869629, "loss/real": 0.5259116888046265, "rewards/accuracies": 1.0, "rewards/generated": -185.81912231445312, "rewards/margins": 74.68618774414062, "rewards/real": -111.1329345703125, "step": 51 }, { "epoch": 0.11, "grad_norm": 12.176337661733065, "learning_rate": 4.998932514657231e-07, "logits/generated": -2.3679566383361816, "logits/oppo_generated": -2.7421092987060547, "logits/oppo_real": -2.7155723571777344, "logits/real": -2.4217591285705566, "logps/generated": -250.55404663085938, "logps/oppo_gen": -69.50028228759766, "logps/oppo_real": -243.26260375976562, "logps/real": -365.6922607421875, "loss": 1.137, "loss/gen": 0.5619360208511353, "loss/real": 0.57623291015625, "rewards/accuracies": 0.875, "rewards/generated": -181.05377197265625, "rewards/margins": 58.624122619628906, "rewards/real": -122.42965698242188, "step": 52 }, { "epoch": 0.11, "grad_norm": 10.84681504435497, "learning_rate": 4.998332120925598e-07, "logits/generated": -2.3570823669433594, "logits/oppo_generated": -2.680886745452881, "logits/oppo_real": -2.738534688949585, "logits/real": -2.3443045616149902, "logps/generated": -245.57504272460938, "logps/oppo_gen": -61.518577575683594, "logps/oppo_real": -214.97161865234375, "logps/real": -337.97381591796875, "loss": 1.1329, "loss/gen": 0.5838844776153564, "loss/real": 0.5817942023277283, "rewards/accuracies": 0.9375, "rewards/generated": -184.05645751953125, "rewards/margins": 61.05426788330078, "rewards/real": -123.002197265625, "step": 53 }, { "epoch": 0.11, "grad_norm": 9.795093818593013, "learning_rate": 4.997598371650346e-07, "logits/generated": -2.291536808013916, "logits/oppo_generated": -2.534092903137207, "logits/oppo_real": -2.636704921722412, "logits/real": -2.234243869781494, "logps/generated": -295.8362121582031, "logps/oppo_gen": -95.54362487792969, "logps/oppo_real": -232.2601318359375, "logps/real": -351.40863037109375, "loss": 1.0871, "loss/gen": 0.543353796005249, "loss/real": 0.5595182776451111, "rewards/accuracies": 1.0, "rewards/generated": -200.2926025390625, "rewards/margins": 81.14410400390625, "rewards/real": -119.14847564697266, "step": 54 }, { "epoch": 0.12, "grad_norm": 11.960172044416053, "learning_rate": 4.996731305997416e-07, "logits/generated": -2.40325665473938, "logits/oppo_generated": -2.7552638053894043, "logits/oppo_real": -2.7744545936584473, "logits/real": -2.4595704078674316, "logps/generated": -263.9963073730469, "logps/oppo_gen": -73.20036315917969, "logps/oppo_real": -265.6083679199219, "logps/real": -375.6069641113281, "loss": 1.0676, "loss/gen": 0.5453953742980957, "loss/real": 0.5264946222305298, "rewards/accuracies": 0.875, "rewards/generated": -190.79592895507812, "rewards/margins": 80.79731750488281, "rewards/real": -109.99860382080078, "step": 55 }, { "epoch": 0.12, "grad_norm": 14.116545991408064, "learning_rate": 4.995730970248893e-07, "logits/generated": -2.3119473457336426, "logits/oppo_generated": -2.819808006286621, "logits/oppo_real": -2.7743167877197266, "logits/real": -2.46488094329834, "logps/generated": -294.59149169921875, "logps/oppo_gen": -84.80085754394531, "logps/oppo_real": -403.3960266113281, "logps/real": -471.69610595703125, "loss": 1.0333, "loss/gen": 0.48806941509246826, "loss/real": 0.38371366262435913, "rewards/accuracies": 1.0, "rewards/generated": -209.79061889648438, "rewards/margins": 141.4905548095703, "rewards/real": -68.30006408691406, "step": 56 }, { "epoch": 0.12, "grad_norm": 11.833621885694749, "learning_rate": 4.994597417800523e-07, "logits/generated": -2.383392572402954, "logits/oppo_generated": -2.7924365997314453, "logits/oppo_real": -2.751426935195923, "logits/real": -2.4370484352111816, "logps/generated": -281.76629638671875, "logps/oppo_gen": -79.0185775756836, "logps/oppo_real": -225.24346923828125, "logps/real": -324.58514404296875, "loss": 1.035, "loss/gen": 0.48555830121040344, "loss/real": 0.48214584589004517, "rewards/accuracies": 1.0, "rewards/generated": -202.74771118164062, "rewards/margins": 103.40604400634766, "rewards/real": -99.34165954589844, "step": 57 }, { "epoch": 0.12, "grad_norm": 10.633234759823537, "learning_rate": 4.993330709158879e-07, "logits/generated": -2.333850145339966, "logits/oppo_generated": -2.8365046977996826, "logits/oppo_real": -2.7985291481018066, "logits/real": -2.4481301307678223, "logps/generated": -275.4985046386719, "logps/oppo_gen": -82.01077270507812, "logps/oppo_real": -335.5836486816406, "logps/real": -424.78973388671875, "loss": 0.992, "loss/gen": 0.5070739388465881, "loss/real": 0.4416646659374237, "rewards/accuracies": 1.0, "rewards/generated": -193.4877471923828, "rewards/margins": 104.28164672851562, "rewards/real": -89.20610046386719, "step": 58 }, { "epoch": 0.12, "grad_norm": 10.303258835881012, "learning_rate": 4.991930911938115e-07, "logits/generated": -2.3403124809265137, "logits/oppo_generated": -2.5403761863708496, "logits/oppo_real": -2.8262884616851807, "logits/real": -2.159945487976074, "logps/generated": -257.6687316894531, "logps/oppo_gen": -65.96223449707031, "logps/oppo_real": -207.23001098632812, "logps/real": -311.77996826171875, "loss": 0.9855, "loss/gen": 0.5228403806686401, "loss/real": 0.5156495571136475, "rewards/accuracies": 0.9375, "rewards/generated": -191.70651245117188, "rewards/margins": 87.15653991699219, "rewards/real": -104.54997253417969, "step": 59 }, { "epoch": 0.13, "grad_norm": 10.776866961748691, "learning_rate": 4.990398100856366e-07, "logits/generated": -2.2386527061462402, "logits/oppo_generated": -2.676912307739258, "logits/oppo_real": -2.7533621788024902, "logits/real": -2.286830186843872, "logps/generated": -357.25042724609375, "logps/oppo_gen": -91.29155731201172, "logps/oppo_real": -219.24154663085938, "logps/real": -348.81939697265625, "loss": 0.9867, "loss/gen": 0.3898318409919739, "loss/real": 0.6121199727058411, "rewards/accuracies": 1.0, "rewards/generated": -265.9588623046875, "rewards/margins": 136.38104248046875, "rewards/real": -129.57781982421875, "step": 60 }, { "epoch": 0.13, "grad_norm": 11.787518278170479, "learning_rate": 4.988732357731762e-07, "logits/generated": -2.2177071571350098, "logits/oppo_generated": -2.8242249488830566, "logits/oppo_real": -2.6713385581970215, "logits/real": -2.442739963531494, "logps/generated": -307.24603271484375, "logps/oppo_gen": -86.80853271484375, "logps/oppo_real": -296.34722900390625, "logps/real": -410.3054504394531, "loss": 0.9475, "loss/gen": 0.4432734251022339, "loss/real": 0.5487948656082153, "rewards/accuracies": 1.0, "rewards/generated": -220.4375, "rewards/margins": 106.47928619384766, "rewards/real": -113.95822143554688, "step": 61 }, { "epoch": 0.13, "grad_norm": 10.84077487554943, "learning_rate": 4.986933771478051e-07, "logits/generated": -2.376708984375, "logits/oppo_generated": -2.7960495948791504, "logits/oppo_real": -2.914412260055542, "logits/real": -2.3897695541381836, "logps/generated": -300.8831787109375, "logps/oppo_gen": -86.83887481689453, "logps/oppo_real": -453.4812927246094, "logps/real": -554.751953125, "loss": 0.9287, "loss/gen": 0.4425833523273468, "loss/real": 0.5028396248817444, "rewards/accuracies": 0.875, "rewards/generated": -214.0443115234375, "rewards/margins": 112.77363586425781, "rewards/real": -101.27067565917969, "step": 62 }, { "epoch": 0.13, "grad_norm": 11.885957501092347, "learning_rate": 4.985002438099865e-07, "logits/generated": -2.196812629699707, "logits/oppo_generated": -2.7576608657836914, "logits/oppo_real": -2.674570083618164, "logits/real": -2.3408308029174805, "logps/generated": -318.3380126953125, "logps/oppo_gen": -76.6629409790039, "logps/oppo_real": -243.74818420410156, "logps/real": -356.16204833984375, "loss": 0.9311, "loss/gen": 0.3887555003166199, "loss/real": 0.5405441522598267, "rewards/accuracies": 0.9375, "rewards/generated": -241.6750946044922, "rewards/margins": 129.26123046875, "rewards/real": -112.41386413574219, "step": 63 }, { "epoch": 0.13, "grad_norm": 19.176210542785817, "learning_rate": 4.982938460687582e-07, "logits/generated": -2.14847469329834, "logits/oppo_generated": -2.658493995666504, "logits/oppo_real": -2.693326950073242, "logits/real": -2.232679843902588, "logps/generated": -289.572509765625, "logps/oppo_gen": -74.29948425292969, "logps/oppo_real": -365.4249267578125, "logps/real": -468.91778564453125, "loss": 1.0004, "loss/gen": 0.4416448771953583, "loss/real": 0.5346858501434326, "rewards/accuracies": 0.9375, "rewards/generated": -215.27301025390625, "rewards/margins": 111.78018188476562, "rewards/real": -103.4928207397461, "step": 64 }, { "epoch": 0.14, "grad_norm": 12.604560850090447, "learning_rate": 4.980741949411839e-07, "logits/generated": -2.200733184814453, "logits/oppo_generated": -2.8042337894439697, "logits/oppo_real": -2.743964195251465, "logits/real": -2.2919199466705322, "logps/generated": -308.5362548828125, "logps/oppo_gen": -76.34394073486328, "logps/oppo_real": -196.71514892578125, "logps/real": -299.24163818359375, "loss": 0.9107, "loss/gen": 0.439577579498291, "loss/real": 0.5086124539375305, "rewards/accuracies": 1.0, "rewards/generated": -232.19229125976562, "rewards/margins": 129.66580200195312, "rewards/real": -102.5264892578125, "step": 65 }, { "epoch": 0.14, "grad_norm": 14.953938779939541, "learning_rate": 4.978413021517633e-07, "logits/generated": -2.2938356399536133, "logits/oppo_generated": -2.7946197986602783, "logits/oppo_real": -2.7596311569213867, "logits/real": -2.365877389907837, "logps/generated": -342.2618713378906, "logps/oppo_gen": -103.196044921875, "logps/oppo_real": -267.36993408203125, "logps/real": -376.911376953125, "loss": 0.892, "loss/gen": 0.3898586630821228, "loss/real": 0.5500133037567139, "rewards/accuracies": 0.9375, "rewards/generated": -239.06582641601562, "rewards/margins": 129.5243682861328, "rewards/real": -109.54145812988281, "step": 66 }, { "epoch": 0.14, "grad_norm": 20.73778569701294, "learning_rate": 4.975951801318083e-07, "logits/generated": -2.0994839668273926, "logits/oppo_generated": -2.7236528396606445, "logits/oppo_real": -2.8300952911376953, "logits/real": -2.186398506164551, "logps/generated": -274.72259521484375, "logps/oppo_gen": -66.93666076660156, "logps/oppo_real": -370.18499755859375, "logps/real": -475.4491882324219, "loss": 0.8971, "loss/gen": 0.4556940495967865, "loss/real": 0.5313321948051453, "rewards/accuracies": 0.9375, "rewards/generated": -207.78594970703125, "rewards/margins": 102.52177429199219, "rewards/real": -105.26417541503906, "step": 67 }, { "epoch": 0.14, "grad_norm": 17.5792161119695, "learning_rate": 4.973358420187775e-07, "logits/generated": -2.248991012573242, "logits/oppo_generated": -2.843545436859131, "logits/oppo_real": -2.811431407928467, "logits/real": -2.3350868225097656, "logps/generated": -321.4705505371094, "logps/oppo_gen": -81.84077453613281, "logps/oppo_real": -353.8198547363281, "logps/real": -448.204345703125, "loss": 0.8387, "loss/gen": 0.3652092218399048, "loss/real": 0.4975828528404236, "rewards/accuracies": 0.9375, "rewards/generated": -239.62977600097656, "rewards/margins": 145.24526977539062, "rewards/real": -94.384521484375, "step": 68 }, { "epoch": 0.14, "grad_norm": 34.968975158457425, "learning_rate": 4.970633016555764e-07, "logits/generated": -2.250572681427002, "logits/oppo_generated": -2.743389368057251, "logits/oppo_real": -2.8539438247680664, "logits/real": -2.277600049972534, "logps/generated": -332.234375, "logps/oppo_gen": -81.96417236328125, "logps/oppo_real": -289.67822265625, "logps/real": -347.64324951171875, "loss": 0.8749, "loss/gen": 0.4391787052154541, "loss/real": 0.34725236892700195, "rewards/accuracies": 1.0, "rewards/generated": -250.27023315429688, "rewards/margins": 192.30517578125, "rewards/real": -57.96504211425781, "step": 69 }, { "epoch": 0.15, "grad_norm": 21.457009560452356, "learning_rate": 4.967775735898179e-07, "logits/generated": -2.1518430709838867, "logits/oppo_generated": -2.724855422973633, "logits/oppo_real": -2.7126691341400146, "logits/real": -2.200214147567749, "logps/generated": -317.98663330078125, "logps/oppo_gen": -72.91289520263672, "logps/oppo_real": -194.3795623779297, "logps/real": -256.5523681640625, "loss": 0.829, "loss/gen": 0.38895365595817566, "loss/real": 0.3911527395248413, "rewards/accuracies": 1.0, "rewards/generated": -245.07374572753906, "rewards/margins": 182.90090942382812, "rewards/real": -62.17283630371094, "step": 70 }, { "epoch": 0.15, "grad_norm": 16.85674377781829, "learning_rate": 4.964786730730454e-07, "logits/generated": -1.9712395668029785, "logits/oppo_generated": -2.6243536472320557, "logits/oppo_real": -2.568087100982666, "logits/real": -2.1335487365722656, "logps/generated": -288.0013732910156, "logps/oppo_gen": -69.08726501464844, "logps/oppo_real": -247.77418518066406, "logps/real": -316.950927734375, "loss": 0.8361, "loss/gen": 0.42092961072921753, "loss/real": 0.40566056966781616, "rewards/accuracies": 0.9375, "rewards/generated": -218.91412353515625, "rewards/margins": 149.7374267578125, "rewards/real": -69.17670440673828, "step": 71 }, { "epoch": 0.15, "grad_norm": 18.339407359690973, "learning_rate": 4.961666160599197e-07, "logits/generated": -2.207213878631592, "logits/oppo_generated": -2.8543405532836914, "logits/oppo_real": -2.865431785583496, "logits/real": -2.299715757369995, "logps/generated": -325.899658203125, "logps/oppo_gen": -78.86032104492188, "logps/oppo_real": -285.64013671875, "logps/real": -355.99383544921875, "loss": 0.8282, "loss/gen": 0.3996672034263611, "loss/real": 0.41223788261413574, "rewards/accuracies": 0.9375, "rewards/generated": -247.03933715820312, "rewards/margins": 176.6856689453125, "rewards/real": -70.35367584228516, "step": 72 }, { "epoch": 0.15, "grad_norm": 20.965985316297516, "learning_rate": 4.958414192073665e-07, "logits/generated": -2.016292095184326, "logits/oppo_generated": -2.547126531600952, "logits/oppo_real": -2.573683738708496, "logits/real": -2.0639572143554688, "logps/generated": -343.04656982421875, "logps/oppo_gen": -78.18771362304688, "logps/oppo_real": -325.4617614746094, "logps/real": -398.49114990234375, "loss": 0.8034, "loss/gen": 0.39308467507362366, "loss/real": 0.4334869384765625, "rewards/accuracies": 1.0, "rewards/generated": -264.8587951660156, "rewards/margins": 191.82943725585938, "rewards/real": -73.02937316894531, "step": 73 }, { "epoch": 0.15, "grad_norm": 17.53030039345492, "learning_rate": 4.955030998736876e-07, "logits/generated": -2.1453638076782227, "logits/oppo_generated": -2.717832326889038, "logits/oppo_real": -2.810462474822998, "logits/real": -2.1777830123901367, "logps/generated": -310.4494323730469, "logps/oppo_gen": -75.6651382446289, "logps/oppo_real": -284.924072265625, "logps/real": -360.6187744140625, "loss": 0.8214, "loss/gen": 0.37556761503219604, "loss/real": 0.45106858015060425, "rewards/accuracies": 1.0, "rewards/generated": -234.78427124023438, "rewards/margins": 159.08956909179688, "rewards/real": -75.69470977783203, "step": 74 }, { "epoch": 0.16, "grad_norm": 17.718348691994233, "learning_rate": 4.951516761176343e-07, "logits/generated": -2.3389203548431396, "logits/oppo_generated": -3.063918113708496, "logits/oppo_real": -3.1160035133361816, "logits/real": -2.4417004585266113, "logps/generated": -420.660400390625, "logps/oppo_gen": -91.83782958984375, "logps/oppo_real": -399.8033447265625, "logps/real": -444.0775451660156, "loss": 0.7504, "loss/gen": 0.2812493145465851, "loss/real": 0.33059465885162354, "rewards/accuracies": 1.0, "rewards/generated": -328.8226318359375, "rewards/margins": 284.54840087890625, "rewards/real": -44.274192810058594, "step": 75 }, { "epoch": 0.16, "grad_norm": 17.563640733916134, "learning_rate": 4.947871666974437e-07, "logits/generated": -1.8955752849578857, "logits/oppo_generated": -2.6598410606384277, "logits/oppo_real": -2.6542224884033203, "logits/real": -2.035309314727783, "logps/generated": -412.85662841796875, "logps/oppo_gen": -67.25988006591797, "logps/oppo_real": -283.60589599609375, "logps/real": -359.90924072265625, "loss": 0.7995, "loss/gen": 0.4136648178100586, "loss/real": 0.45442986488342285, "rewards/accuracies": 0.9375, "rewards/generated": -345.59674072265625, "rewards/margins": 269.29339599609375, "rewards/real": -76.3033447265625, "step": 76 }, { "epoch": 0.16, "grad_norm": 21.040827708625496, "learning_rate": 4.944095910698372e-07, "logits/generated": -2.041642189025879, "logits/oppo_generated": -2.7393393516540527, "logits/oppo_real": -2.783273458480835, "logits/real": -2.1023669242858887, "logps/generated": -362.0621643066406, "logps/oppo_gen": -127.53181457519531, "logps/oppo_real": -495.255859375, "logps/real": -547.227294921875, "loss": 0.754, "loss/gen": 0.3859551250934601, "loss/real": 0.3337196409702301, "rewards/accuracies": 1.0, "rewards/generated": -234.53038024902344, "rewards/margins": 182.55897521972656, "rewards/real": -51.97139358520508, "step": 77 }, { "epoch": 0.16, "grad_norm": 18.349379759240588, "learning_rate": 4.940189693889818e-07, "logits/generated": -1.6511880159378052, "logits/oppo_generated": -2.492225408554077, "logits/oppo_real": -2.408968210220337, "logits/real": -1.7214398384094238, "logps/generated": -343.68011474609375, "logps/oppo_gen": -74.83660888671875, "logps/oppo_real": -254.16744995117188, "logps/real": -314.009521484375, "loss": 0.7448, "loss/gen": 0.28272247314453125, "loss/real": 0.38186439871788025, "rewards/accuracies": 1.0, "rewards/generated": -268.843505859375, "rewards/margins": 209.00146484375, "rewards/real": -59.842044830322266, "step": 78 }, { "epoch": 0.17, "grad_norm": 32.05865062784341, "learning_rate": 4.936153225054146e-07, "logits/generated": -1.8036625385284424, "logits/oppo_generated": -2.690124988555908, "logits/oppo_real": -2.757858991622925, "logits/real": -1.9343822002410889, "logps/generated": -343.80975341796875, "logps/oppo_gen": -73.35165405273438, "logps/oppo_real": -301.7453918457031, "logps/real": -369.839599609375, "loss": 0.7582, "loss/gen": 0.2956180274486542, "loss/real": 0.4004019796848297, "rewards/accuracies": 1.0, "rewards/generated": -270.45806884765625, "rewards/margins": 202.3638458251953, "rewards/real": -68.09424591064453, "step": 79 }, { "epoch": 0.17, "grad_norm": 15.991292491442804, "learning_rate": 4.931986719649298e-07, "logits/generated": -1.7580184936523438, "logits/oppo_generated": -2.8001205921173096, "logits/oppo_real": -2.768902540206909, "logits/real": -1.9494829177856445, "logps/generated": -286.70635986328125, "logps/oppo_gen": -60.519004821777344, "logps/oppo_real": -291.74224853515625, "logps/real": -375.76318359375, "loss": 0.7548, "loss/gen": 0.430566668510437, "loss/real": 0.5071053504943848, "rewards/accuracies": 0.9375, "rewards/generated": -226.18736267089844, "rewards/margins": 142.16641235351562, "rewards/real": -84.02093505859375, "step": 80 }, { "epoch": 0.17, "grad_norm": 35.05248053421558, "learning_rate": 4.927690400074286e-07, "logits/generated": -1.9403841495513916, "logits/oppo_generated": -2.788212776184082, "logits/oppo_real": -2.712280750274658, "logits/real": -2.0578057765960693, "logps/generated": -382.52056884765625, "logps/oppo_gen": -89.67400360107422, "logps/oppo_real": -264.5151672363281, "logps/real": -327.13836669921875, "loss": 0.8402, "loss/gen": 0.41186997294425964, "loss/real": 0.36961644887924194, "rewards/accuracies": 0.9375, "rewards/generated": -292.8465576171875, "rewards/margins": 230.22337341308594, "rewards/real": -62.62320327758789, "step": 81 }, { "epoch": 0.17, "grad_norm": 30.86921322604955, "learning_rate": 4.923264495657319e-07, "logits/generated": -1.8421218395233154, "logits/oppo_generated": -2.6853179931640625, "logits/oppo_real": -2.8702688217163086, "logits/real": -1.8912787437438965, "logps/generated": -324.0174560546875, "logps/oppo_gen": -78.56639099121094, "logps/oppo_real": -319.54541015625, "logps/real": -412.1978759765625, "loss": 0.8296, "loss/gen": 0.3521851897239685, "loss/real": 0.5055232644081116, "rewards/accuracies": 1.0, "rewards/generated": -245.4510498046875, "rewards/margins": 152.798583984375, "rewards/real": -92.65247344970703, "step": 82 }, { "epoch": 0.17, "grad_norm": 20.8992282268632, "learning_rate": 4.918709242643563e-07, "logits/generated": -1.789534330368042, "logits/oppo_generated": -2.82261323928833, "logits/oppo_real": -2.704563617706299, "logits/real": -1.9741158485412598, "logps/generated": -350.99957275390625, "logps/oppo_gen": -68.2770767211914, "logps/oppo_real": -239.40855407714844, "logps/real": -317.4848937988281, "loss": 0.756, "loss/gen": 0.3403918147087097, "loss/real": 0.4584280252456665, "rewards/accuracies": 0.9375, "rewards/generated": -282.72247314453125, "rewards/margins": 204.64617919921875, "rewards/real": -78.07632446289062, "step": 83 }, { "epoch": 0.18, "grad_norm": 25.62434021635362, "learning_rate": 4.914024884182534e-07, "logits/generated": -1.859442114830017, "logits/oppo_generated": -2.74169921875, "logits/oppo_real": -2.7226579189300537, "logits/real": -1.9540834426879883, "logps/generated": -345.9833068847656, "logps/oppo_gen": -60.66720962524414, "logps/oppo_real": -155.5846405029297, "logps/real": -249.47677612304688, "loss": 0.7579, "loss/gen": 0.3146398961544037, "loss/real": 0.5418342351913452, "rewards/accuracies": 1.0, "rewards/generated": -285.31610107421875, "rewards/margins": 191.4239501953125, "rewards/real": -93.89213562011719, "step": 84 }, { "epoch": 0.18, "grad_norm": 30.52967866350504, "learning_rate": 4.909211670315114e-07, "logits/generated": -1.861513376235962, "logits/oppo_generated": -2.698122501373291, "logits/oppo_real": -2.8987860679626465, "logits/real": -1.883429765701294, "logps/generated": -399.5763244628906, "logps/oppo_gen": -75.33815002441406, "logps/oppo_real": -307.6024169921875, "logps/real": -379.11126708984375, "loss": 0.7771, "loss/gen": 0.29335951805114746, "loss/real": 0.470558226108551, "rewards/accuracies": 0.9375, "rewards/generated": -324.2381591796875, "rewards/margins": 252.7292938232422, "rewards/real": -71.50885772705078, "step": 85 }, { "epoch": 0.18, "grad_norm": 25.632891077294982, "learning_rate": 4.904269857960208e-07, "logits/generated": -1.8649256229400635, "logits/oppo_generated": -2.7086257934570312, "logits/oppo_real": -2.7736144065856934, "logits/real": -1.9235105514526367, "logps/generated": -334.47784423828125, "logps/oppo_gen": -64.02923583984375, "logps/oppo_real": -219.5755157470703, "logps/real": -252.96922302246094, "loss": 0.7185, "loss/gen": 0.3252614736557007, "loss/real": 0.29650163650512695, "rewards/accuracies": 1.0, "rewards/generated": -270.4486083984375, "rewards/margins": 237.05490112304688, "rewards/real": -33.393707275390625, "step": 86 }, { "epoch": 0.18, "grad_norm": 23.153695772590766, "learning_rate": 4.899199710901028e-07, "logits/generated": -1.6987268924713135, "logits/oppo_generated": -2.6510324478149414, "logits/oppo_real": -2.6278481483459473, "logits/real": -1.944386601448059, "logps/generated": -371.7005615234375, "logps/oppo_gen": -90.28435516357422, "logps/oppo_real": -405.62408447265625, "logps/real": -452.80035400390625, "loss": 0.7195, "loss/gen": 0.3139500916004181, "loss/real": 0.33689984679222107, "rewards/accuracies": 0.9375, "rewards/generated": -281.41619873046875, "rewards/margins": 234.23989868164062, "rewards/real": -47.17626190185547, "step": 87 }, { "epoch": 0.18, "grad_norm": 29.933416099971993, "learning_rate": 4.894001499771015e-07, "logits/generated": -1.8620039224624634, "logits/oppo_generated": -2.7284958362579346, "logits/oppo_real": -2.8318753242492676, "logits/real": -1.9367091655731201, "logps/generated": -318.0210266113281, "logps/oppo_gen": -71.02044677734375, "logps/oppo_real": -323.9693603515625, "logps/real": -344.6488037109375, "loss": 0.7011, "loss/gen": 0.34499967098236084, "loss/real": 0.25943779945373535, "rewards/accuracies": 1.0, "rewards/generated": -247.00057983398438, "rewards/margins": 226.32110595703125, "rewards/real": -20.679473876953125, "step": 88 }, { "epoch": 0.19, "grad_norm": 30.951742696923585, "learning_rate": 4.888675502039391e-07, "logits/generated": -1.5965911149978638, "logits/oppo_generated": -2.762685537338257, "logits/oppo_real": -2.5824837684631348, "logits/real": -1.8686351776123047, "logps/generated": -281.6763000488281, "logps/oppo_gen": -63.627159118652344, "logps/oppo_real": -235.63560485839844, "logps/real": -290.3897705078125, "loss": 0.7559, "loss/gen": 0.48157230019569397, "loss/real": 0.40050894021987915, "rewards/accuracies": 0.9375, "rewards/generated": -218.0491180419922, "rewards/margins": 163.29498291015625, "rewards/real": -54.75415802001953, "step": 89 }, { "epoch": 0.19, "grad_norm": 24.58686594608555, "learning_rate": 4.883222001996351e-07, "logits/generated": -1.3386802673339844, "logits/oppo_generated": -2.5235419273376465, "logits/oppo_real": -2.3336281776428223, "logits/real": -1.5898027420043945, "logps/generated": -390.20355224609375, "logps/oppo_gen": -76.78201293945312, "logps/oppo_real": -216.29495239257812, "logps/real": -296.91888427734375, "loss": 0.7039, "loss/gen": 0.25537049770355225, "loss/real": 0.4638826549053192, "rewards/accuracies": 0.9375, "rewards/generated": -313.4215393066406, "rewards/margins": 232.797607421875, "rewards/real": -80.62393188476562, "step": 90 }, { "epoch": 0.19, "grad_norm": 23.052282204670107, "learning_rate": 4.877641290737883e-07, "logits/generated": -1.6338614225387573, "logits/oppo_generated": -2.6937649250030518, "logits/oppo_real": -2.8629989624023438, "logits/real": -1.6108810901641846, "logps/generated": -389.9326171875, "logps/oppo_gen": -72.10958862304688, "logps/oppo_real": -217.7257537841797, "logps/real": -288.9909973144531, "loss": 0.7187, "loss/gen": 0.26666751503944397, "loss/real": 0.4401911199092865, "rewards/accuracies": 1.0, "rewards/generated": -317.822998046875, "rewards/margins": 246.55776977539062, "rewards/real": -71.26525115966797, "step": 91 }, { "epoch": 0.19, "grad_norm": 20.153327670906794, "learning_rate": 4.871933666150239e-07, "logits/generated": -1.5877046585083008, "logits/oppo_generated": -2.700439453125, "logits/oppo_real": -2.8355603218078613, "logits/real": -1.638826847076416, "logps/generated": -360.88323974609375, "logps/oppo_gen": -71.94976806640625, "logps/oppo_real": -317.441650390625, "logps/real": -369.97760009765625, "loss": 0.7454, "loss/gen": 0.26692843437194824, "loss/real": 0.3744610548019409, "rewards/accuracies": 0.9375, "rewards/generated": -288.9334716796875, "rewards/margins": 236.39752197265625, "rewards/real": -52.53595733642578, "step": 92 }, { "epoch": 0.19, "grad_norm": 26.06467902499053, "learning_rate": 4.866099432894024e-07, "logits/generated": -1.3850927352905273, "logits/oppo_generated": -2.61204195022583, "logits/oppo_real": -2.505739212036133, "logits/real": -1.6025258302688599, "logps/generated": -427.29150390625, "logps/oppo_gen": -96.4445571899414, "logps/oppo_real": -350.4456787109375, "logps/real": -403.288818359375, "loss": 0.7432, "loss/gen": 0.22171354293823242, "loss/real": 0.3635830879211426, "rewards/accuracies": 1.0, "rewards/generated": -330.846923828125, "rewards/margins": 278.0038146972656, "rewards/real": -52.843109130859375, "step": 93 }, { "epoch": 0.2, "grad_norm": 38.3231737909853, "learning_rate": 4.860138902387939e-07, "logits/generated": -1.5998806953430176, "logits/oppo_generated": -2.6566057205200195, "logits/oppo_real": -2.782456874847412, "logits/real": -1.706424593925476, "logps/generated": -365.70166015625, "logps/oppo_gen": -71.42315673828125, "logps/oppo_real": -228.60372924804688, "logps/real": -270.2125244140625, "loss": 0.673, "loss/gen": 0.3238295912742615, "loss/real": 0.32134318351745605, "rewards/accuracies": 0.9375, "rewards/generated": -294.27850341796875, "rewards/margins": 252.66970825195312, "rewards/real": -41.60879135131836, "step": 94 }, { "epoch": 0.2, "grad_norm": 14.568084300970918, "learning_rate": 4.854052392792161e-07, "logits/generated": -1.7710530757904053, "logits/oppo_generated": -2.799525499343872, "logits/oppo_real": -2.799852132797241, "logits/real": -1.9224238395690918, "logps/generated": -359.0024108886719, "logps/oppo_gen": -80.63153076171875, "logps/oppo_real": -294.978759765625, "logps/real": -347.17596435546875, "loss": 0.6688, "loss/gen": 0.2978823482990265, "loss/real": 0.34458765387535095, "rewards/accuracies": 1.0, "rewards/generated": -278.370849609375, "rewards/margins": 226.17367553710938, "rewards/real": -52.19721221923828, "step": 95 }, { "epoch": 0.2, "grad_norm": 22.049713838757494, "learning_rate": 4.847840228991356e-07, "logits/generated": -1.7129011154174805, "logits/oppo_generated": -2.7052841186523438, "logits/oppo_real": -2.9764838218688965, "logits/real": -1.8225359916687012, "logps/generated": -321.9461669921875, "logps/oppo_gen": -64.55047607421875, "logps/oppo_real": -324.9617004394531, "logps/real": -359.5166320800781, "loss": 0.7227, "loss/gen": 0.34642934799194336, "loss/real": 0.3209618330001831, "rewards/accuracies": 0.9375, "rewards/generated": -257.39569091796875, "rewards/margins": 222.84072875976562, "rewards/real": -34.55495834350586, "step": 96 }, { "epoch": 0.2, "grad_norm": 22.99331169737191, "learning_rate": 4.841502742577338e-07, "logits/generated": -1.6851990222930908, "logits/oppo_generated": -2.788656234741211, "logits/oppo_real": -2.713801383972168, "logits/real": -1.8722307682037354, "logps/generated": -351.01678466796875, "logps/oppo_gen": -59.00885009765625, "logps/oppo_real": -176.6733856201172, "logps/real": -210.3897705078125, "loss": 0.6606, "loss/gen": 0.41348496079444885, "loss/real": 0.29584217071533203, "rewards/accuracies": 1.0, "rewards/generated": -292.0079650878906, "rewards/margins": 258.2915344238281, "rewards/real": -33.716373443603516, "step": 97 }, { "epoch": 0.21, "grad_norm": 34.593367135979804, "learning_rate": 4.83504027183137e-07, "logits/generated": -1.626028060913086, "logits/oppo_generated": -2.680184841156006, "logits/oppo_real": -2.639242649078369, "logits/real": -1.781846523284912, "logps/generated": -338.901611328125, "logps/oppo_gen": -63.749298095703125, "logps/oppo_real": -225.60980224609375, "logps/real": -232.88375854492188, "loss": 0.6294, "loss/gen": 0.3008587062358856, "loss/real": 0.2209412306547165, "rewards/accuracies": 1.0, "rewards/generated": -275.15234375, "rewards/margins": 267.8783874511719, "rewards/real": -7.2739667892456055, "step": 98 }, { "epoch": 0.21, "grad_norm": 24.448282268073047, "learning_rate": 4.828453161706108e-07, "logits/generated": -1.6220470666885376, "logits/oppo_generated": -2.674605369567871, "logits/oppo_real": -2.5424935817718506, "logits/real": -1.8579437732696533, "logps/generated": -359.24969482421875, "logps/oppo_gen": -88.02183532714844, "logps/oppo_real": -287.251953125, "logps/real": -309.96124267578125, "loss": 0.6726, "loss/gen": 0.3974546194076538, "loss/real": 0.28890979290008545, "rewards/accuracies": 1.0, "rewards/generated": -271.22784423828125, "rewards/margins": 248.51858520507812, "rewards/real": -22.709251403808594, "step": 99 }, { "epoch": 0.21, "grad_norm": 28.265135904765145, "learning_rate": 4.821741763807186e-07, "logits/generated": -1.6087634563446045, "logits/oppo_generated": -2.742450714111328, "logits/oppo_real": -2.764233350753784, "logits/real": -1.770212173461914, "logps/generated": -427.012939453125, "logps/oppo_gen": -84.33467102050781, "logps/oppo_real": -317.5994873046875, "logps/real": -369.8688049316406, "loss": 0.6712, "loss/gen": 0.24097959697246552, "loss/real": 0.36573004722595215, "rewards/accuracies": 0.9375, "rewards/generated": -342.6782531738281, "rewards/margins": 290.408935546875, "rewards/real": -52.26934051513672, "step": 100 }, { "epoch": 0.21, "grad_norm": 31.218570132114476, "learning_rate": 4.81490643637445e-07, "logits/generated": -1.654707908630371, "logits/oppo_generated": -2.7942566871643066, "logits/oppo_real": -2.9361443519592285, "logits/real": -1.663029432296753, "logps/generated": -362.0345458984375, "logps/oppo_gen": -62.20787048339844, "logps/oppo_real": -250.55889892578125, "logps/real": -306.2510681152344, "loss": 0.6456, "loss/gen": 0.23637422919273376, "loss/real": 0.411770224571228, "rewards/accuracies": 1.0, "rewards/generated": -299.8266906738281, "rewards/margins": 244.13455200195312, "rewards/real": -55.69215393066406, "step": 101 }, { "epoch": 0.21, "grad_norm": 18.808293984501145, "learning_rate": 4.807947544262838e-07, "logits/generated": -1.5722711086273193, "logits/oppo_generated": -2.7636303901672363, "logits/oppo_real": -2.752176284790039, "logits/real": -1.6461234092712402, "logps/generated": -319.77374267578125, "logps/oppo_gen": -56.84593963623047, "logps/oppo_real": -185.9375457763672, "logps/real": -256.1296691894531, "loss": 0.661, "loss/gen": 0.32895606756210327, "loss/real": 0.4661652743816376, "rewards/accuracies": 0.9375, "rewards/generated": -262.9277648925781, "rewards/margins": 192.73565673828125, "rewards/real": -70.19212341308594, "step": 102 }, { "epoch": 0.22, "grad_norm": 16.356918952156665, "learning_rate": 4.800865458922898e-07, "logits/generated": -1.1754525899887085, "logits/oppo_generated": -2.619927406311035, "logits/oppo_real": -2.643867015838623, "logits/real": -1.3315857648849487, "logps/generated": -441.89556884765625, "logps/oppo_gen": -69.87133026123047, "logps/oppo_real": -264.7545166015625, "logps/real": -286.8203430175781, "loss": 0.6036, "loss/gen": 0.3638154864311218, "loss/real": 0.2782540023326874, "rewards/accuracies": 1.0, "rewards/generated": -372.0242004394531, "rewards/margins": 349.9583435058594, "rewards/real": -22.065845489501953, "step": 103 }, { "epoch": 0.22, "grad_norm": 33.94130891954338, "learning_rate": 4.793660558380969e-07, "logits/generated": -1.2376275062561035, "logits/oppo_generated": -2.6367125511169434, "logits/oppo_real": -2.6506056785583496, "logits/real": -1.3006834983825684, "logps/generated": -473.9594421386719, "logps/oppo_gen": -74.07377624511719, "logps/oppo_real": -268.19134521484375, "logps/real": -330.2340087890625, "loss": 0.7282, "loss/gen": 0.20278650522232056, "loss/real": 0.44767656922340393, "rewards/accuracies": 1.0, "rewards/generated": -399.88568115234375, "rewards/margins": 337.8430480957031, "rewards/real": -62.04261779785156, "step": 104 }, { "epoch": 0.22, "grad_norm": 41.505814845821604, "learning_rate": 4.786333227218995e-07, "logits/generated": -1.4750065803527832, "logits/oppo_generated": -2.779034376144409, "logits/oppo_real": -2.8282456398010254, "logits/real": -1.5095953941345215, "logps/generated": -399.50592041015625, "logps/oppo_gen": -69.72903442382812, "logps/oppo_real": -289.5185546875, "logps/real": -296.21661376953125, "loss": 0.5856, "loss/gen": 0.25725850462913513, "loss/real": 0.21779251098632812, "rewards/accuracies": 1.0, "rewards/generated": -329.77685546875, "rewards/margins": 323.07879638671875, "rewards/real": -6.6980767250061035, "step": 105 }, { "epoch": 0.22, "grad_norm": 25.561021981262066, "learning_rate": 4.778883856554003e-07, "logits/generated": -1.1449687480926514, "logits/oppo_generated": -2.7481935024261475, "logits/oppo_real": -2.5869712829589844, "logits/real": -1.6161012649536133, "logps/generated": -371.6748352050781, "logps/oppo_gen": -82.20314025878906, "logps/oppo_real": -304.43182373046875, "logps/real": -322.1702880859375, "loss": 0.5752, "loss/gen": 0.313020795583725, "loss/real": 0.24407562613487244, "rewards/accuracies": 1.0, "rewards/generated": -289.47174072265625, "rewards/margins": 271.7332763671875, "rewards/real": -17.738439559936523, "step": 106 }, { "epoch": 0.22, "grad_norm": 32.99757515061239, "learning_rate": 4.771312844017224e-07, "logits/generated": -1.1853927373886108, "logits/oppo_generated": -2.60406494140625, "logits/oppo_real": -2.690169334411621, "logits/real": -1.1705992221832275, "logps/generated": -584.3870849609375, "logps/oppo_gen": -71.73402404785156, "logps/oppo_real": -317.8191833496094, "logps/real": -361.0145263671875, "loss": 0.6036, "loss/gen": 0.2634323835372925, "loss/real": 0.32717156410217285, "rewards/accuracies": 1.0, "rewards/generated": -512.653076171875, "rewards/margins": 469.45770263671875, "rewards/real": -43.19536209106445, "step": 107 }, { "epoch": 0.23, "grad_norm": 27.058304741220393, "learning_rate": 4.7636205937328664e-07, "logits/generated": -1.094700813293457, "logits/oppo_generated": -2.6770029067993164, "logits/oppo_real": -2.848341703414917, "logits/real": -1.0369551181793213, "logps/generated": -383.38958740234375, "logps/oppo_gen": -69.1045150756836, "logps/oppo_real": -314.2913818359375, "logps/real": -380.5789794921875, "loss": 0.6046, "loss/gen": 0.2522876262664795, "loss/real": 0.494897723197937, "rewards/accuracies": 1.0, "rewards/generated": -314.28509521484375, "rewards/margins": 247.99752807617188, "rewards/real": -66.28755950927734, "step": 108 }, { "epoch": 0.23, "grad_norm": 19.27185799288815, "learning_rate": 4.755807516296547e-07, "logits/generated": -1.0437504053115845, "logits/oppo_generated": -2.708833694458008, "logits/oppo_real": -2.679042339324951, "logits/real": -1.4015766382217407, "logps/generated": -431.29931640625, "logps/oppo_gen": -72.196044921875, "logps/oppo_real": -315.014404296875, "logps/real": -354.2579345703125, "loss": 0.5367, "loss/gen": 0.19119717180728912, "loss/real": 0.36264485120773315, "rewards/accuracies": 0.9375, "rewards/generated": -359.10321044921875, "rewards/margins": 319.8597106933594, "rewards/real": -39.2435302734375, "step": 109 }, { "epoch": 0.23, "grad_norm": 25.759154831189385, "learning_rate": 4.747874028753375e-07, "logits/generated": -0.6104034185409546, "logits/oppo_generated": -2.5937318801879883, "logits/oppo_real": -2.607351303100586, "logits/real": -0.8924816846847534, "logps/generated": -444.0633544921875, "logps/oppo_gen": -83.86407470703125, "logps/oppo_real": -295.3841857910156, "logps/real": -332.865966796875, "loss": 0.5587, "loss/gen": 0.1575571894645691, "loss/real": 0.3404185175895691, "rewards/accuracies": 1.0, "rewards/generated": -360.19927978515625, "rewards/margins": 322.71746826171875, "rewards/real": -37.48179626464844, "step": 110 }, { "epoch": 0.23, "grad_norm": 21.85601087073395, "learning_rate": 4.739820554575686e-07, "logits/generated": -0.9350783228874207, "logits/oppo_generated": -2.8147330284118652, "logits/oppo_real": -2.815829277038574, "logits/real": -1.2611966133117676, "logps/generated": -478.8157653808594, "logps/oppo_gen": -102.38821411132812, "logps/oppo_real": -428.80462646484375, "logps/real": -467.30364990234375, "loss": 0.6008, "loss/gen": 0.1555887907743454, "loss/real": 0.3448715806007385, "rewards/accuracies": 1.0, "rewards/generated": -376.42755126953125, "rewards/margins": 337.92852783203125, "rewards/real": -38.49901580810547, "step": 111 }, { "epoch": 0.23, "grad_norm": 30.296640301759844, "learning_rate": 4.731647523640445e-07, "logits/generated": -0.2944636642932892, "logits/oppo_generated": -2.7116103172302246, "logits/oppo_real": -2.71325421333313, "logits/real": -0.4511444568634033, "logps/generated": -401.2589111328125, "logps/oppo_gen": -63.440895080566406, "logps/oppo_real": -205.63375854492188, "logps/real": -252.83216857910156, "loss": 0.5373, "loss/gen": 0.2936084270477295, "loss/real": 0.3831641674041748, "rewards/accuracies": 0.875, "rewards/generated": -337.8180236816406, "rewards/margins": 290.61962890625, "rewards/real": -47.198387145996094, "step": 112 }, { "epoch": 0.24, "grad_norm": 24.15761310635001, "learning_rate": 4.723355372206297e-07, "logits/generated": -0.03680907189846039, "logits/oppo_generated": -2.817870616912842, "logits/oppo_real": -2.7380404472351074, "logits/real": -0.25676417350769043, "logps/generated": -494.8074951171875, "logps/oppo_gen": -72.92829132080078, "logps/oppo_real": -280.0667724609375, "logps/real": -325.4312744140625, "loss": 0.5344, "loss/gen": 0.15776054561138153, "loss/real": 0.35052889585494995, "rewards/accuracies": 1.0, "rewards/generated": -421.8791809082031, "rewards/margins": 376.51470947265625, "rewards/real": -45.3645133972168, "step": 113 }, { "epoch": 0.24, "grad_norm": 24.451979306227695, "learning_rate": 4.714944542890278e-07, "logits/generated": -0.45144331455230713, "logits/oppo_generated": -2.7973763942718506, "logits/oppo_real": -2.9100446701049805, "logits/real": 0.06648720800876617, "logps/generated": -474.79876708984375, "logps/oppo_gen": -72.39361572265625, "logps/oppo_real": -261.4820556640625, "logps/real": -300.52618408203125, "loss": 0.5192, "loss/gen": 0.2735764980316162, "loss/real": 0.327633798122406, "rewards/accuracies": 1.0, "rewards/generated": -402.40509033203125, "rewards/margins": 363.3609619140625, "rewards/real": -39.04413604736328, "step": 114 }, { "epoch": 0.24, "grad_norm": 28.39276507997625, "learning_rate": 4.706415484644195e-07, "logits/generated": 0.07198745012283325, "logits/oppo_generated": -2.7207179069519043, "logits/oppo_real": -2.880669116973877, "logits/real": 0.16586969792842865, "logps/generated": -494.2124328613281, "logps/oppo_gen": -71.45054626464844, "logps/oppo_real": -325.3066711425781, "logps/real": -359.72027587890625, "loss": 0.5301, "loss/gen": 0.1617964804172516, "loss/real": 0.3581461012363434, "rewards/accuracies": 1.0, "rewards/generated": -422.7618713378906, "rewards/margins": 388.34832763671875, "rewards/real": -34.413551330566406, "step": 115 }, { "epoch": 0.24, "grad_norm": 34.45851538047689, "learning_rate": 4.6977686527306555e-07, "logits/generated": 0.7090212106704712, "logits/oppo_generated": -2.6319010257720947, "logits/oppo_real": -2.643490791320801, "logits/real": 0.8649207949638367, "logps/generated": -447.24755859375, "logps/oppo_gen": -73.28824615478516, "logps/oppo_real": -283.16064453125, "logps/real": -304.78131103515625, "loss": 0.5395, "loss/gen": 0.17251688241958618, "loss/real": 0.28544631600379944, "rewards/accuracies": 1.0, "rewards/generated": -373.9593505859375, "rewards/margins": 352.3387145996094, "rewards/real": -21.620647430419922, "step": 116 }, { "epoch": 0.24, "grad_norm": 43.58818988959696, "learning_rate": 4.6890045086987707e-07, "logits/generated": 1.441139578819275, "logits/oppo_generated": -2.66965389251709, "logits/oppo_real": -2.664613962173462, "logits/real": 1.5334069728851318, "logps/generated": -609.1144409179688, "logps/oppo_gen": -70.15062713623047, "logps/oppo_real": -289.0845947265625, "logps/real": -356.73895263671875, "loss": 0.6133, "loss/gen": 0.12631765007972717, "loss/real": 0.5410544872283936, "rewards/accuracies": 1.0, "rewards/generated": -538.9638061523438, "rewards/margins": 471.3094177246094, "rewards/real": -67.65434265136719, "step": 117 }, { "epoch": 0.25, "grad_norm": 43.39746496997621, "learning_rate": 4.680123520359519e-07, "logits/generated": 1.1541767120361328, "logits/oppo_generated": -2.7411558628082275, "logits/oppo_real": -2.8276548385620117, "logits/real": 1.5257081985473633, "logps/generated": -463.33917236328125, "logps/oppo_gen": -79.05010986328125, "logps/oppo_real": -260.1556396484375, "logps/real": -341.10089111328125, "loss": 0.5586, "loss/gen": 0.11574704945087433, "loss/real": 0.5906919240951538, "rewards/accuracies": 1.0, "rewards/generated": -384.2890625, "rewards/margins": 303.34381103515625, "rewards/real": -80.94526672363281, "step": 118 }, { "epoch": 0.25, "grad_norm": 41.35345986224279, "learning_rate": 4.671126161760772e-07, "logits/generated": 0.8925095796585083, "logits/oppo_generated": -2.8381776809692383, "logits/oppo_real": -2.895987033843994, "logits/real": 0.3291308879852295, "logps/generated": -404.5374755859375, "logps/oppo_gen": -75.62970733642578, "logps/oppo_real": -341.94390869140625, "logps/real": -355.20977783203125, "loss": 0.5461, "loss/gen": 0.30142074823379517, "loss/real": 0.24460729956626892, "rewards/accuracies": 1.0, "rewards/generated": -328.90777587890625, "rewards/margins": 315.6418762207031, "rewards/real": -13.265907287597656, "step": 119 }, { "epoch": 0.25, "grad_norm": 49.230028469674885, "learning_rate": 4.662012913161997e-07, "logits/generated": 1.0944744348526, "logits/oppo_generated": -2.8429150581359863, "logits/oppo_real": -2.720982313156128, "logits/real": -0.4009183645248413, "logps/generated": -391.66265869140625, "logps/oppo_gen": -83.18344116210938, "logps/oppo_real": -297.22320556640625, "logps/real": -295.4556884765625, "loss": 0.5535, "loss/gen": 0.34921205043792725, "loss/real": 0.20909735560417175, "rewards/accuracies": 1.0, "rewards/generated": -308.47918701171875, "rewards/margins": 310.2467041015625, "rewards/real": 1.7674973011016846, "step": 120 }, { "epoch": 0.25, "grad_norm": 42.11695635602497, "learning_rate": 4.6527842610086124e-07, "logits/generated": 2.4489541053771973, "logits/oppo_generated": -2.769073009490967, "logits/oppo_real": -2.7379775047302246, "logits/real": 2.0011355876922607, "logps/generated": -365.7623291015625, "logps/oppo_gen": -63.66206359863281, "logps/oppo_real": -185.47964477539062, "logps/real": -223.77847290039062, "loss": 0.5198, "loss/gen": 0.2616707682609558, "loss/real": 0.3866133689880371, "rewards/accuracies": 0.875, "rewards/generated": -302.1002502441406, "rewards/margins": 263.8013916015625, "rewards/real": -38.298851013183594, "step": 121 }, { "epoch": 0.26, "grad_norm": 80.59431041391042, "learning_rate": 4.6434406979060327e-07, "logits/generated": 1.4197285175323486, "logits/oppo_generated": -2.808683395385742, "logits/oppo_real": -2.830435276031494, "logits/real": 1.4447864294052124, "logps/generated": -570.8482666015625, "logps/oppo_gen": -82.84730529785156, "logps/oppo_real": -266.9815979003906, "logps/real": -341.551025390625, "loss": 0.5194, "loss/gen": 0.08523067831993103, "loss/real": 0.5628464221954346, "rewards/accuracies": 1.0, "rewards/generated": -488.0009765625, "rewards/margins": 413.4315490722656, "rewards/real": -74.56942749023438, "step": 122 }, { "epoch": 0.26, "grad_norm": 30.311157951927846, "learning_rate": 4.6339827225933657e-07, "logits/generated": 2.5612378120422363, "logits/oppo_generated": -2.7827868461608887, "logits/oppo_real": -2.7702150344848633, "logits/real": 0.973285436630249, "logps/generated": -453.61419677734375, "logps/oppo_gen": -78.0826416015625, "logps/oppo_real": -209.60366821289062, "logps/real": -243.14308166503906, "loss": 0.4662, "loss/gen": 0.17920607328414917, "loss/real": 0.3000218868255615, "rewards/accuracies": 0.9375, "rewards/generated": -375.53155517578125, "rewards/margins": 341.99212646484375, "rewards/real": -33.53944778442383, "step": 123 }, { "epoch": 0.26, "grad_norm": 31.34001878339817, "learning_rate": 4.6244108399167977e-07, "logits/generated": 0.48759734630584717, "logits/oppo_generated": -2.81278133392334, "logits/oppo_real": -3.0445303916931152, "logits/real": 0.41890883445739746, "logps/generated": -512.637939453125, "logps/oppo_gen": -97.97140502929688, "logps/oppo_real": -318.54241943359375, "logps/real": -327.8448791503906, "loss": 0.4723, "loss/gen": 0.11405383050441742, "loss/real": 0.24334505200386047, "rewards/accuracies": 1.0, "rewards/generated": -414.66650390625, "rewards/margins": 405.3641052246094, "rewards/real": -9.302425384521484, "step": 124 }, { "epoch": 0.26, "grad_norm": 43.36779760537333, "learning_rate": 4.614725560802639e-07, "logits/generated": 2.383960485458374, "logits/oppo_generated": -2.5410995483398438, "logits/oppo_real": -2.6250815391540527, "logits/real": 1.8639309406280518, "logps/generated": -426.3900146484375, "logps/oppo_gen": -85.07911682128906, "logps/oppo_real": -302.9610595703125, "logps/real": -330.28704833984375, "loss": 0.4588, "loss/gen": 0.2807266116142273, "loss/real": 0.2807849943637848, "rewards/accuracies": 1.0, "rewards/generated": -341.3109130859375, "rewards/margins": 313.98492431640625, "rewards/real": -27.325986862182617, "step": 125 }, { "epoch": 0.26, "grad_norm": 22.65675107204528, "learning_rate": 4.60492740223006e-07, "logits/generated": 3.7563798427581787, "logits/oppo_generated": -2.6942880153656006, "logits/oppo_real": -2.620532989501953, "logits/real": 1.8744386434555054, "logps/generated": -482.1658020019531, "logps/oppo_gen": -80.65577697753906, "logps/oppo_real": -334.33636474609375, "logps/real": -340.36029052734375, "loss": 0.4313, "loss/gen": 0.1339287906885147, "loss/real": 0.25005483627319336, "rewards/accuracies": 1.0, "rewards/generated": -401.510009765625, "rewards/margins": 395.486083984375, "rewards/real": -6.023906707763672, "step": 126 }, { "epoch": 0.27, "grad_norm": 40.99230289315758, "learning_rate": 4.595016887203488e-07, "logits/generated": 4.8035993576049805, "logits/oppo_generated": -2.6506311893463135, "logits/oppo_real": -2.5980076789855957, "logits/real": 3.132660388946533, "logps/generated": -512.6973876953125, "logps/oppo_gen": -71.25318908691406, "logps/oppo_real": -271.4465026855469, "logps/real": -341.7394714355469, "loss": 0.4682, "loss/gen": 0.09215886890888214, "loss/real": 0.5298959016799927, "rewards/accuracies": 0.9375, "rewards/generated": -441.4441833496094, "rewards/margins": 371.1512451171875, "rewards/real": -70.29296875, "step": 127 }, { "epoch": 0.27, "grad_norm": 56.79854708680386, "learning_rate": 4.584994544724695e-07, "logits/generated": 3.480712890625, "logits/oppo_generated": -2.7567696571350098, "logits/oppo_real": -2.71878981590271, "logits/real": 2.234100103378296, "logps/generated": -505.15631103515625, "logps/oppo_gen": -76.11280059814453, "logps/oppo_real": -274.9986267089844, "logps/real": -316.529541015625, "loss": 0.5428, "loss/gen": 0.06698533147573471, "loss/real": 0.37679505348205566, "rewards/accuracies": 1.0, "rewards/generated": -429.04351806640625, "rewards/margins": 387.5125732421875, "rewards/real": -41.530914306640625, "step": 128 }, { "epoch": 0.27, "grad_norm": 34.5051442609468, "learning_rate": 4.574860909764559e-07, "logits/generated": 3.7409512996673584, "logits/oppo_generated": -2.7801530361175537, "logits/oppo_real": -2.662064790725708, "logits/real": 1.398315191268921, "logps/generated": -539.3264770507812, "logps/oppo_gen": -81.54147338867188, "logps/oppo_real": -353.9856262207031, "logps/real": -355.10321044921875, "loss": 0.3566, "loss/gen": 0.20957569777965546, "loss/real": 0.2095394879579544, "rewards/accuracies": 1.0, "rewards/generated": -457.7850341796875, "rewards/margins": 456.66748046875, "rewards/real": -1.117553472518921, "step": 129 }, { "epoch": 0.27, "grad_norm": 58.581181985450755, "learning_rate": 4.5646165232345103e-07, "logits/generated": 3.001497983932495, "logits/oppo_generated": -2.7938759326934814, "logits/oppo_real": -2.797100067138672, "logits/real": -0.1904727816581726, "logps/generated": -476.5736083984375, "logps/oppo_gen": -85.75858306884766, "logps/oppo_real": -330.778076171875, "logps/real": -355.42279052734375, "loss": 0.4177, "loss/gen": 0.21888455748558044, "loss/real": 0.3061525225639343, "rewards/accuracies": 1.0, "rewards/generated": -390.8150329589844, "rewards/margins": 366.17034912109375, "rewards/real": -24.644712448120117, "step": 130 }, { "epoch": 0.27, "grad_norm": 76.23830596803028, "learning_rate": 4.554261931957657e-07, "logits/generated": 1.2798744440078735, "logits/oppo_generated": -2.702157497406006, "logits/oppo_real": -2.785520553588867, "logits/real": 0.2064148187637329, "logps/generated": -550.7872314453125, "logps/oppo_gen": -101.4595947265625, "logps/oppo_real": -297.305419921875, "logps/real": -281.96148681640625, "loss": 0.4163, "loss/gen": 0.17837375402450562, "loss/real": 0.21107184886932373, "rewards/accuracies": 1.0, "rewards/generated": -449.3276062011719, "rewards/margins": 464.67156982421875, "rewards/real": 15.343944549560547, "step": 131 }, { "epoch": 0.28, "grad_norm": 42.78716460666826, "learning_rate": 4.5437976886395955e-07, "logits/generated": 3.7917985916137695, "logits/oppo_generated": -2.3708133697509766, "logits/oppo_real": -2.418696641921997, "logits/real": 2.052703380584717, "logps/generated": -596.48828125, "logps/oppo_gen": -74.7413558959961, "logps/oppo_real": -314.3866882324219, "logps/real": -309.42364501953125, "loss": 0.3384, "loss/gen": 0.10826118290424347, "loss/real": 0.19940531253814697, "rewards/accuracies": 1.0, "rewards/generated": -521.7468872070312, "rewards/margins": 526.7098999023438, "rewards/real": 4.963039875030518, "step": 132 }, { "epoch": 0.28, "grad_norm": 35.28753611515155, "learning_rate": 4.5332243518389136e-07, "logits/generated": 3.8041744232177734, "logits/oppo_generated": -2.743939161300659, "logits/oppo_real": -2.73405122756958, "logits/real": 1.2489296197891235, "logps/generated": -574.4937133789062, "logps/oppo_gen": -79.94535827636719, "logps/oppo_real": -272.021240234375, "logps/real": -293.32586669921875, "loss": 0.4572, "loss/gen": 0.04564107209444046, "loss/real": 0.34681591391563416, "rewards/accuracies": 1.0, "rewards/generated": -494.54833984375, "rewards/margins": 473.2436828613281, "rewards/real": -21.304649353027344, "step": 133 }, { "epoch": 0.28, "grad_norm": 66.3057437946084, "learning_rate": 4.5225424859373684e-07, "logits/generated": 3.78678560256958, "logits/oppo_generated": -2.7007479667663574, "logits/oppo_real": -2.753429412841797, "logits/real": 0.6452586650848389, "logps/generated": -532.5565185546875, "logps/oppo_gen": -77.23085021972656, "logps/oppo_real": -266.35296630859375, "logps/real": -287.3046875, "loss": 0.4927, "loss/gen": 0.0773511677980423, "loss/real": 0.2800845503807068, "rewards/accuracies": 1.0, "rewards/generated": -455.32568359375, "rewards/margins": 434.3739929199219, "rewards/real": -20.95172691345215, "step": 134 }, { "epoch": 0.28, "grad_norm": 83.11441133217362, "learning_rate": 4.511752661109768e-07, "logits/generated": 2.705482244491577, "logits/oppo_generated": -2.691554546356201, "logits/oppo_real": -2.8548970222473145, "logits/real": 1.9097120761871338, "logps/generated": -569.287841796875, "logps/oppo_gen": -83.92848205566406, "logps/oppo_real": -314.796875, "logps/real": -374.4678039550781, "loss": 0.517, "loss/gen": 0.06381608545780182, "loss/real": 0.5655554533004761, "rewards/accuracies": 0.875, "rewards/generated": -485.3593444824219, "rewards/margins": 425.6883850097656, "rewards/real": -59.67094421386719, "step": 135 }, { "epoch": 0.28, "grad_norm": 33.9451048253141, "learning_rate": 4.5008554532935316e-07, "logits/generated": 1.3386224508285522, "logits/oppo_generated": -2.7305233478546143, "logits/oppo_real": -2.919029474258423, "logits/real": 0.10530074685811996, "logps/generated": -542.9541625976562, "logps/oppo_gen": -73.71615600585938, "logps/oppo_real": -288.34320068359375, "logps/real": -307.84796142578125, "loss": 0.4413, "loss/gen": 0.0951322540640831, "loss/real": 0.34142056107521057, "rewards/accuracies": 0.9375, "rewards/generated": -469.238037109375, "rewards/margins": 449.7333068847656, "rewards/real": -19.504728317260742, "step": 136 }, { "epoch": 0.29, "grad_norm": 39.38732446009757, "learning_rate": 4.4898514441579493e-07, "logits/generated": 1.0103791952133179, "logits/oppo_generated": -2.7485411167144775, "logits/oppo_real": -2.8734302520751953, "logits/real": -0.42922842502593994, "logps/generated": -525.3568115234375, "logps/oppo_gen": -87.78644561767578, "logps/oppo_real": -241.0199432373047, "logps/real": -247.32534790039062, "loss": 0.444, "loss/gen": 0.13491667807102203, "loss/real": 0.21978439390659332, "rewards/accuracies": 1.0, "rewards/generated": -437.5703125, "rewards/margins": 431.26495361328125, "rewards/real": -6.305412292480469, "step": 137 }, { "epoch": 0.29, "grad_norm": 47.51406812641783, "learning_rate": 4.478741221073135e-07, "logits/generated": 0.6537960767745972, "logits/oppo_generated": -2.693493127822876, "logits/oppo_real": -2.865375280380249, "logits/real": -0.2775810658931732, "logps/generated": -534.7683715820312, "logps/oppo_gen": -103.05206298828125, "logps/oppo_real": -489.7313232421875, "logps/real": -491.6168212890625, "loss": 0.4552, "loss/gen": 0.13188748061656952, "loss/real": 0.23172855377197266, "rewards/accuracies": 0.9375, "rewards/generated": -431.71630859375, "rewards/margins": 429.8308410644531, "rewards/real": -1.8854761123657227, "step": 138 }, { "epoch": 0.29, "grad_norm": 53.06036049131922, "learning_rate": 4.467525377078671e-07, "logits/generated": 1.995945930480957, "logits/oppo_generated": -2.8657331466674805, "logits/oppo_real": -2.861227512359619, "logits/real": 0.10061898082494736, "logps/generated": -467.5784912109375, "logps/oppo_gen": -75.67571258544922, "logps/oppo_real": -287.5257568359375, "logps/real": -309.69976806640625, "loss": 0.4271, "loss/gen": 0.1389007419347763, "loss/real": 0.3501040041446686, "rewards/accuracies": 0.875, "rewards/generated": -391.90277099609375, "rewards/margins": 369.728759765625, "rewards/real": -22.17399787902832, "step": 139 }, { "epoch": 0.29, "grad_norm": 32.51703279629001, "learning_rate": 4.456204510851956e-07, "logits/generated": 3.4422943592071533, "logits/oppo_generated": -2.8228468894958496, "logits/oppo_real": -2.7876362800598145, "logits/real": -0.5376209020614624, "logps/generated": -495.888671875, "logps/oppo_gen": -74.50221252441406, "logps/oppo_real": -289.463623046875, "logps/real": -322.7392578125, "loss": 0.5096, "loss/gen": 0.12224815785884857, "loss/real": 0.4228206276893616, "rewards/accuracies": 0.875, "rewards/generated": -421.38641357421875, "rewards/margins": 388.11077880859375, "rewards/real": -33.275638580322266, "step": 140 }, { "epoch": 0.29, "grad_norm": 42.809336390868786, "learning_rate": 4.444779226676246e-07, "logits/generated": 3.224811315536499, "logits/oppo_generated": -2.8969969749450684, "logits/oppo_real": -2.8022689819335938, "logits/real": -0.3930954337120056, "logps/generated": -569.3974609375, "logps/oppo_gen": -73.09552764892578, "logps/oppo_real": -194.8427734375, "logps/real": -206.7709197998047, "loss": 0.4614, "loss/gen": 0.05774524062871933, "loss/real": 0.2606027126312256, "rewards/accuracies": 1.0, "rewards/generated": -496.3019714355469, "rewards/margins": 484.373779296875, "rewards/real": -11.928150177001953, "step": 141 }, { "epoch": 0.3, "grad_norm": 43.46620005101665, "learning_rate": 4.4332501344084005e-07, "logits/generated": 4.730867385864258, "logits/oppo_generated": -2.628166675567627, "logits/oppo_real": -2.599769353866577, "logits/real": -0.41064295172691345, "logps/generated": -721.416748046875, "logps/oppo_gen": -60.03528594970703, "logps/oppo_real": -204.53244018554688, "logps/real": -209.001220703125, "loss": 0.4182, "loss/gen": 0.04421373829245567, "loss/real": 0.2350148856639862, "rewards/accuracies": 1.0, "rewards/generated": -661.3814086914062, "rewards/margins": 656.91259765625, "rewards/real": -4.468780994415283, "step": 142 }, { "epoch": 0.3, "grad_norm": 95.168892838272, "learning_rate": 4.4216178494463295e-07, "logits/generated": 3.0664350986480713, "logits/oppo_generated": -2.8121323585510254, "logits/oppo_real": -2.735476493835449, "logits/real": 0.5808520317077637, "logps/generated": -730.3409423828125, "logps/oppo_gen": -83.52450561523438, "logps/oppo_real": -399.65948486328125, "logps/real": -436.56512451171875, "loss": 0.4257, "loss/gen": 0.08173231780529022, "loss/real": 0.3919621706008911, "rewards/accuracies": 1.0, "rewards/generated": -646.81640625, "rewards/margins": 609.910888671875, "rewards/real": -36.90563201904297, "step": 143 }, { "epoch": 0.3, "grad_norm": 33.59023782512469, "learning_rate": 4.4098829926961477e-07, "logits/generated": 2.08347225189209, "logits/oppo_generated": -2.6980109214782715, "logits/oppo_real": -2.983388900756836, "logits/real": 2.226280450820923, "logps/generated": -538.6220703125, "logps/oppo_gen": -78.72826385498047, "logps/oppo_real": -206.14263916015625, "logps/real": -280.3409423828125, "loss": 0.442, "loss/gen": 0.16410240530967712, "loss/real": 0.6128249764442444, "rewards/accuracies": 1.0, "rewards/generated": -459.893798828125, "rewards/margins": 385.6955261230469, "rewards/real": -74.19828796386719, "step": 144 }, { "epoch": 0.3, "grad_norm": 88.10951215852066, "learning_rate": 4.398046190539024e-07, "logits/generated": 0.7178199291229248, "logits/oppo_generated": -2.756943702697754, "logits/oppo_real": -2.801675796508789, "logits/real": -1.1681644916534424, "logps/generated": -528.5025634765625, "logps/oppo_gen": -86.45504760742188, "logps/oppo_real": -309.9644775390625, "logps/real": -289.3895568847656, "loss": 0.4441, "loss/gen": 0.22567950189113617, "loss/real": 0.1723930984735489, "rewards/accuracies": 1.0, "rewards/generated": -442.04754638671875, "rewards/margins": 462.6224365234375, "rewards/real": 20.574920654296875, "step": 145 }, { "epoch": 0.31, "grad_norm": 96.93573197130407, "learning_rate": 4.3861080747977566e-07, "logits/generated": 2.6402652263641357, "logits/oppo_generated": -2.6960177421569824, "logits/oppo_real": -2.7647712230682373, "logits/real": 0.7887452840805054, "logps/generated": -672.70458984375, "logps/oppo_gen": -86.10973358154297, "logps/oppo_real": -239.423583984375, "logps/real": -226.11953735351562, "loss": 0.4131, "loss/gen": 0.06325114518404007, "loss/real": 0.2073189914226532, "rewards/accuracies": 1.0, "rewards/generated": -586.5948486328125, "rewards/margins": 599.8988647460938, "rewards/real": 13.304061889648438, "step": 146 }, { "epoch": 0.31, "grad_norm": 45.219169778200126, "learning_rate": 4.37406928270304e-07, "logits/generated": 1.6589947938919067, "logits/oppo_generated": -2.884913444519043, "logits/oppo_real": -3.0370020866394043, "logits/real": 1.7600839138031006, "logps/generated": -586.7601318359375, "logps/oppo_gen": -84.35366821289062, "logps/oppo_real": -321.58514404296875, "logps/real": -338.66668701171875, "loss": 0.4066, "loss/gen": 0.061856117099523544, "loss/real": 0.3464030623435974, "rewards/accuracies": 1.0, "rewards/generated": -502.40643310546875, "rewards/margins": 485.32489013671875, "rewards/real": -17.08155059814453, "step": 147 }, { "epoch": 0.31, "grad_norm": 63.70267659886337, "learning_rate": 4.3619304568594546e-07, "logits/generated": 4.678864479064941, "logits/oppo_generated": -2.751469135284424, "logits/oppo_real": -2.747343063354492, "logits/real": 2.1347851753234863, "logps/generated": -602.4761962890625, "logps/oppo_gen": -61.375526428222656, "logps/oppo_real": -252.73489379882812, "logps/real": -278.0063781738281, "loss": 0.4028, "loss/gen": 0.09595850110054016, "loss/real": 0.30543041229248047, "rewards/accuracies": 1.0, "rewards/generated": -541.1007080078125, "rewards/margins": 515.8291625976562, "rewards/real": -25.27145767211914, "step": 148 }, { "epoch": 0.31, "grad_norm": 23.65093958724209, "learning_rate": 4.349692245211165e-07, "logits/generated": 4.383773326873779, "logits/oppo_generated": -2.817481517791748, "logits/oppo_real": -2.831568717956543, "logits/real": 0.075783371925354, "logps/generated": -662.4940185546875, "logps/oppo_gen": -105.8621597290039, "logps/oppo_real": -327.9042663574219, "logps/real": -348.6964111328125, "loss": 0.3471, "loss/gen": 0.0598088875412941, "loss/real": 0.3484806418418884, "rewards/accuracies": 0.9375, "rewards/generated": -556.6318359375, "rewards/margins": 535.8397216796875, "rewards/real": -20.792137145996094, "step": 149 }, { "epoch": 0.31, "grad_norm": 44.175200783446506, "learning_rate": 4.337355301007335e-07, "logits/generated": 2.0324692726135254, "logits/oppo_generated": -2.709242820739746, "logits/oppo_real": -2.915376663208008, "logits/real": 0.17188304662704468, "logps/generated": -653.400390625, "logps/oppo_gen": -87.3215103149414, "logps/oppo_real": -375.57025146484375, "logps/real": -374.0672607421875, "loss": 0.4053, "loss/gen": 0.05262891948223114, "loss/real": 0.2269752323627472, "rewards/accuracies": 1.0, "rewards/generated": -566.0789184570312, "rewards/margins": 567.5818481445312, "rewards/real": 1.5029735565185547, "step": 150 }, { "epoch": 0.32, "grad_norm": 43.954390034674724, "learning_rate": 4.324920282767256e-07, "logits/generated": 2.20145320892334, "logits/oppo_generated": -2.964322328567505, "logits/oppo_real": -2.982285499572754, "logits/real": -1.8403794765472412, "logps/generated": -593.6246948242188, "logps/oppo_gen": -96.36965942382812, "logps/oppo_real": -400.2677001953125, "logps/real": -381.9501953125, "loss": 0.3817, "loss/gen": 0.05244412645697594, "loss/real": 0.17407363653182983, "rewards/accuracies": 1.0, "rewards/generated": -497.2550048828125, "rewards/margins": 515.5724487304688, "rewards/real": 18.3174991607666, "step": 151 }, { "epoch": 0.32, "grad_norm": 24.56001373986472, "learning_rate": 4.312387854245201e-07, "logits/generated": 2.274653911590576, "logits/oppo_generated": -2.6832199096679688, "logits/oppo_real": -2.6996188163757324, "logits/real": 0.23763501644134521, "logps/generated": -595.6156005859375, "logps/oppo_gen": -99.48007202148438, "logps/oppo_real": -341.5190734863281, "logps/real": -335.8973693847656, "loss": 0.4202, "loss/gen": 0.08892205357551575, "loss/real": 0.24602922797203064, "rewards/accuracies": 1.0, "rewards/generated": -496.13555908203125, "rewards/margins": 501.75726318359375, "rewards/real": 5.621710777282715, "step": 152 }, { "epoch": 0.32, "grad_norm": 88.40290290588595, "learning_rate": 4.2997586843949896e-07, "logits/generated": 2.223160982131958, "logits/oppo_generated": -2.5872344970703125, "logits/oppo_real": -2.4579038619995117, "logits/real": -1.5002179145812988, "logps/generated": -527.0772705078125, "logps/oppo_gen": -70.17259216308594, "logps/oppo_real": -416.83404541015625, "logps/real": -401.7785949707031, "loss": 0.4494, "loss/gen": 0.11349719762802124, "loss/real": 0.18198516964912415, "rewards/accuracies": 1.0, "rewards/generated": -456.90472412109375, "rewards/margins": 471.960205078125, "rewards/real": 15.055469512939453, "step": 153 }, { "epoch": 0.32, "grad_norm": 88.65677597217964, "learning_rate": 4.287033447334286e-07, "logits/generated": 2.3303475379943848, "logits/oppo_generated": -2.7479500770568848, "logits/oppo_real": -2.8972339630126953, "logits/real": -0.40632662177085876, "logps/generated": -480.84222412109375, "logps/oppo_gen": -80.25750732421875, "logps/oppo_real": -294.4725646972656, "logps/real": -291.6681823730469, "loss": 0.5496, "loss/gen": 0.2343224287033081, "loss/real": 0.23040422797203064, "rewards/accuracies": 1.0, "rewards/generated": -400.584716796875, "rewards/margins": 403.38909912109375, "rewards/real": 2.804377555847168, "step": 154 }, { "epoch": 0.32, "grad_norm": 32.896789060161325, "learning_rate": 4.2742128223086115e-07, "logits/generated": 0.9537273645401001, "logits/oppo_generated": -2.8202219009399414, "logits/oppo_real": -3.0027949810028076, "logits/real": -0.3538452982902527, "logps/generated": -534.1832275390625, "logps/oppo_gen": -73.1172103881836, "logps/oppo_real": -376.35302734375, "logps/real": -374.4771728515625, "loss": 0.3783, "loss/gen": 0.09071531891822815, "loss/real": 0.2262829840183258, "rewards/accuracies": 1.0, "rewards/generated": -461.0660400390625, "rewards/margins": 462.94183349609375, "rewards/real": 1.8758478164672852, "step": 155 }, { "epoch": 0.33, "grad_norm": 31.70462085246309, "learning_rate": 4.261297493655092e-07, "logits/generated": 3.504033088684082, "logits/oppo_generated": -2.5366318225860596, "logits/oppo_real": -2.6091504096984863, "logits/real": 0.27710264921188354, "logps/generated": -533.432373046875, "logps/oppo_gen": -70.62061309814453, "logps/oppo_real": -308.5993957519531, "logps/real": -312.66424560546875, "loss": 0.395, "loss/gen": 0.09391500055789948, "loss/real": 0.3338760435581207, "rewards/accuracies": 0.9375, "rewards/generated": -462.81170654296875, "rewards/margins": 458.74688720703125, "rewards/real": -4.0648345947265625, "step": 156 }, { "epoch": 0.33, "grad_norm": 55.69409770820648, "learning_rate": 4.2482881507659244e-07, "logits/generated": 5.257065773010254, "logits/oppo_generated": -2.839346408843994, "logits/oppo_real": -2.589020013809204, "logits/real": -0.5401603579521179, "logps/generated": -675.083984375, "logps/oppo_gen": -81.69267272949219, "logps/oppo_real": -345.9958190917969, "logps/real": -360.93621826171875, "loss": 0.4078, "loss/gen": 0.04161800444126129, "loss/real": 0.3369942605495453, "rewards/accuracies": 1.0, "rewards/generated": -593.391357421875, "rewards/margins": 578.450927734375, "rewards/real": -14.940374374389648, "step": 157 }, { "epoch": 0.33, "grad_norm": 69.88690640957003, "learning_rate": 4.235185488051585e-07, "logits/generated": 5.414549827575684, "logits/oppo_generated": -2.6783924102783203, "logits/oppo_real": -2.712614059448242, "logits/real": 1.8532522916793823, "logps/generated": -568.1709594726562, "logps/oppo_gen": -56.096778869628906, "logps/oppo_real": -230.0592498779297, "logps/real": -244.01300048828125, "loss": 0.3708, "loss/gen": 0.04885585233569145, "loss/real": 0.2971190810203552, "rewards/accuracies": 1.0, "rewards/generated": -512.0741577148438, "rewards/margins": 498.12042236328125, "rewards/real": -13.953733444213867, "step": 158 }, { "epoch": 0.33, "grad_norm": 33.58151666793431, "learning_rate": 4.2219902049037554e-07, "logits/generated": 4.000687122344971, "logits/oppo_generated": -2.7408018112182617, "logits/oppo_real": -2.6598281860351562, "logits/real": 0.4296528697013855, "logps/generated": -535.0169677734375, "logps/oppo_gen": -72.39141845703125, "logps/oppo_real": -357.7438659667969, "logps/real": -398.12890625, "loss": 0.3407, "loss/gen": 0.07793214172124863, "loss/real": 0.4461689591407776, "rewards/accuracies": 1.0, "rewards/generated": -462.62554931640625, "rewards/margins": 422.2405090332031, "rewards/real": -40.385040283203125, "step": 159 }, { "epoch": 0.33, "grad_norm": 49.08348725083748, "learning_rate": 4.2087030056579986e-07, "logits/generated": 1.8264459371566772, "logits/oppo_generated": -2.8000125885009766, "logits/oppo_real": -2.9204776287078857, "logits/real": 0.3307687044143677, "logps/generated": -575.8387451171875, "logps/oppo_gen": -87.82046508789062, "logps/oppo_real": -341.7147216796875, "logps/real": -382.95611572265625, "loss": 0.4618, "loss/gen": 0.062293656170368195, "loss/real": 0.4385297894477844, "rewards/accuracies": 1.0, "rewards/generated": -488.0182189941406, "rewards/margins": 446.7768249511719, "rewards/real": -41.24138641357422, "step": 160 }, { "epoch": 0.34, "grad_norm": 41.03429485798662, "learning_rate": 4.1953245995561577e-07, "logits/generated": 4.770796298980713, "logits/oppo_generated": -2.6554837226867676, "logits/oppo_real": -2.6420817375183105, "logits/real": -0.8062165975570679, "logps/generated": -464.02740478515625, "logps/oppo_gen": -63.17603302001953, "logps/oppo_real": -296.563720703125, "logps/real": -275.2172546386719, "loss": 0.3776, "loss/gen": 0.20890629291534424, "loss/real": 0.20531198382377625, "rewards/accuracies": 1.0, "rewards/generated": -400.85137939453125, "rewards/margins": 422.1978759765625, "rewards/real": 21.34646224975586, "step": 161 }, { "epoch": 0.34, "grad_norm": 54.28695053184398, "learning_rate": 4.1818557007085e-07, "logits/generated": 1.0603010654449463, "logits/oppo_generated": -2.7772350311279297, "logits/oppo_real": -2.840733528137207, "logits/real": -0.4071798622608185, "logps/generated": -500.5398254394531, "logps/oppo_gen": -87.80577087402344, "logps/oppo_real": -293.36444091796875, "logps/real": -301.470703125, "loss": 0.4106, "loss/gen": 0.1799190789461136, "loss/real": 0.3776960074901581, "rewards/accuracies": 1.0, "rewards/generated": -412.73406982421875, "rewards/margins": 404.6278076171875, "rewards/real": -8.10627555847168, "step": 162 }, { "epoch": 0.34, "grad_norm": 28.239338981521527, "learning_rate": 4.1682970280555987e-07, "logits/generated": 0.7941306233406067, "logits/oppo_generated": -2.940112590789795, "logits/oppo_real": -3.094730854034424, "logits/real": -0.9150586128234863, "logps/generated": -491.39971923828125, "logps/oppo_gen": -76.87165832519531, "logps/oppo_real": -265.2705383300781, "logps/real": -275.28662109375, "loss": 0.4083, "loss/gen": 0.19433584809303284, "loss/real": 0.27786490321159363, "rewards/accuracies": 1.0, "rewards/generated": -414.528076171875, "rewards/margins": 404.51202392578125, "rewards/real": -10.016090393066406, "step": 163 }, { "epoch": 0.34, "grad_norm": 39.62413279068783, "learning_rate": 4.154649305329958e-07, "logits/generated": 1.9323348999023438, "logits/oppo_generated": -2.86384916305542, "logits/oppo_real": -2.868351697921753, "logits/real": -0.1303810477256775, "logps/generated": -557.84228515625, "logps/oppo_gen": -69.69680786132812, "logps/oppo_real": -212.30072021484375, "logps/real": -245.2010498046875, "loss": 0.501, "loss/gen": 0.10067278146743774, "loss/real": 0.3780396580696106, "rewards/accuracies": 1.0, "rewards/generated": -488.1455078125, "rewards/margins": 455.24517822265625, "rewards/real": -32.900325775146484, "step": 164 }, { "epoch": 0.35, "grad_norm": 32.57133659789597, "learning_rate": 4.140913261017382e-07, "logits/generated": 4.161721229553223, "logits/oppo_generated": -2.869417190551758, "logits/oppo_real": -2.839968204498291, "logits/real": -0.5776036977767944, "logps/generated": -576.4458618164062, "logps/oppo_gen": -88.48406982421875, "logps/oppo_real": -363.1581726074219, "logps/real": -361.1283874511719, "loss": 0.3818, "loss/gen": 0.06913182139396667, "loss/real": 0.25034162402153015, "rewards/accuracies": 1.0, "rewards/generated": -487.9617919921875, "rewards/margins": 489.9915771484375, "rewards/real": 2.029754161834717, "step": 165 }, { "epoch": 0.35, "grad_norm": 58.546320387018135, "learning_rate": 4.127089628318089e-07, "logits/generated": 3.4975500106811523, "logits/oppo_generated": -2.6603808403015137, "logits/oppo_real": -2.783158302307129, "logits/real": -0.20328007638454437, "logps/generated": -624.7900390625, "logps/oppo_gen": -71.49801635742188, "logps/oppo_real": -266.64727783203125, "logps/real": -276.4420166015625, "loss": 0.4823, "loss/gen": 0.14242783188819885, "loss/real": 0.27045345306396484, "rewards/accuracies": 1.0, "rewards/generated": -553.2919921875, "rewards/margins": 543.497314453125, "rewards/real": -9.794729232788086, "step": 166 }, { "epoch": 0.35, "grad_norm": 32.59188857939771, "learning_rate": 4.113179145107575e-07, "logits/generated": 3.191453456878662, "logits/oppo_generated": -2.6542935371398926, "logits/oppo_real": -2.6339142322540283, "logits/real": -0.12376086413860321, "logps/generated": -537.1490478515625, "logps/oppo_gen": -78.53817749023438, "logps/oppo_real": -259.07733154296875, "logps/real": -275.25604248046875, "loss": 0.374, "loss/gen": 0.11224167793989182, "loss/real": 0.27741315960884094, "rewards/accuracies": 1.0, "rewards/generated": -458.61083984375, "rewards/margins": 442.43212890625, "rewards/real": -16.178741455078125, "step": 167 }, { "epoch": 0.35, "grad_norm": 35.26097942374651, "learning_rate": 4.099182553897228e-07, "logits/generated": 3.541701555252075, "logits/oppo_generated": -2.703266143798828, "logits/oppo_real": -2.672393798828125, "logits/real": -1.2182093858718872, "logps/generated": -678.5563354492188, "logps/oppo_gen": -94.5416259765625, "logps/oppo_real": -463.65789794921875, "logps/real": -437.16644287109375, "loss": 0.3667, "loss/gen": 0.08236212283372879, "loss/real": 0.1626996248960495, "rewards/accuracies": 1.0, "rewards/generated": -584.0147094726562, "rewards/margins": 610.506103515625, "rewards/real": 26.491458892822266, "step": 168 }, { "epoch": 0.35, "grad_norm": 29.281886791034637, "learning_rate": 4.0851006017946945e-07, "logits/generated": 5.741336822509766, "logits/oppo_generated": -2.6494524478912354, "logits/oppo_real": -2.6128625869750977, "logits/real": -0.31815677881240845, "logps/generated": -608.6373291015625, "logps/oppo_gen": -66.14505004882812, "logps/oppo_real": -303.870361328125, "logps/real": -288.19305419921875, "loss": 0.3213, "loss/gen": 0.06941390037536621, "loss/real": 0.1854477822780609, "rewards/accuracies": 1.0, "rewards/generated": -542.4923095703125, "rewards/margins": 558.1696166992188, "rewards/real": 15.677297592163086, "step": 169 }, { "epoch": 0.36, "grad_norm": 40.914190047777126, "learning_rate": 4.070934040463998e-07, "logits/generated": 2.282456398010254, "logits/oppo_generated": -2.586618423461914, "logits/oppo_real": -2.638277292251587, "logits/real": 1.154468059539795, "logps/generated": -638.625732421875, "logps/oppo_gen": -84.69650268554688, "logps/oppo_real": -285.56890869140625, "logps/real": -310.78338623046875, "loss": 0.3753, "loss/gen": 0.15161314606666565, "loss/real": 0.3878932595252991, "rewards/accuracies": 0.9375, "rewards/generated": -553.9293212890625, "rewards/margins": 528.71484375, "rewards/real": -25.21445083618164, "step": 170 }, { "epoch": 0.36, "grad_norm": 46.814385288305076, "learning_rate": 4.056683626085422e-07, "logits/generated": 3.1288087368011475, "logits/oppo_generated": -2.861464500427246, "logits/oppo_real": -3.037282943725586, "logits/real": 0.27654290199279785, "logps/generated": -743.48583984375, "logps/oppo_gen": -72.53483581542969, "logps/oppo_real": -316.06658935546875, "logps/real": -325.1678771972656, "loss": 0.4359, "loss/gen": 0.02361534722149372, "loss/real": 0.35334840416908264, "rewards/accuracies": 0.9375, "rewards/generated": -670.9510498046875, "rewards/margins": 661.8497314453125, "rewards/real": -9.101311683654785, "step": 171 }, { "epoch": 0.36, "grad_norm": 33.77107812571931, "learning_rate": 4.042350119315141e-07, "logits/generated": 3.522606372833252, "logits/oppo_generated": -2.546668529510498, "logits/oppo_real": -2.747616767883301, "logits/real": 1.465965986251831, "logps/generated": -586.4928588867188, "logps/oppo_gen": -50.940589904785156, "logps/oppo_real": -195.0120849609375, "logps/real": -198.96145629882812, "loss": 0.4098, "loss/gen": 0.13806982338428497, "loss/real": 0.2442954182624817, "rewards/accuracies": 0.9375, "rewards/generated": -535.55224609375, "rewards/margins": 531.6029052734375, "rewards/real": -3.949376106262207, "step": 172 }, { "epoch": 0.36, "grad_norm": 47.288798554731756, "learning_rate": 4.027934285244623e-07, "logits/generated": 2.3074827194213867, "logits/oppo_generated": -2.7699496746063232, "logits/oppo_real": -2.825406074523926, "logits/real": -0.9217118620872498, "logps/generated": -621.0782470703125, "logps/oppo_gen": -115.70794677734375, "logps/oppo_real": -436.4163818359375, "logps/real": -410.5096435546875, "loss": 0.3803, "loss/gen": 0.22207143902778625, "loss/real": 0.1597864329814911, "rewards/accuracies": 1.0, "rewards/generated": -505.3702392578125, "rewards/margins": 531.277099609375, "rewards/real": 25.90674591064453, "step": 173 }, { "epoch": 0.36, "grad_norm": 38.79185460117159, "learning_rate": 4.0134368933597864e-07, "logits/generated": 2.8078434467315674, "logits/oppo_generated": -2.8050785064697266, "logits/oppo_real": -2.6773767471313477, "logits/real": -1.348842740058899, "logps/generated": -618.10205078125, "logps/oppo_gen": -84.29270935058594, "logps/oppo_real": -391.57928466796875, "logps/real": -382.04180908203125, "loss": 0.3099, "loss/gen": 0.16892102360725403, "loss/real": 0.18786463141441345, "rewards/accuracies": 1.0, "rewards/generated": -533.8093872070312, "rewards/margins": 543.3468017578125, "rewards/real": 9.537463188171387, "step": 174 }, { "epoch": 0.37, "grad_norm": 53.66757624706351, "learning_rate": 3.9988587174999306e-07, "logits/generated": 2.588200569152832, "logits/oppo_generated": -3.0364410877227783, "logits/oppo_real": -2.832014560699463, "logits/real": -2.086704730987549, "logps/generated": -659.8938598632812, "logps/oppo_gen": -103.90766906738281, "logps/oppo_real": -399.5452575683594, "logps/real": -396.6692810058594, "loss": 0.3727, "loss/gen": 0.04421517252922058, "loss/real": 0.24986904859542847, "rewards/accuracies": 1.0, "rewards/generated": -555.9861450195312, "rewards/margins": 558.8621826171875, "rewards/real": 2.875990867614746, "step": 175 }, { "epoch": 0.37, "grad_norm": 21.4714961398682, "learning_rate": 3.9842005358164267e-07, "logits/generated": 3.0104498863220215, "logits/oppo_generated": -2.735055446624756, "logits/oppo_real": -2.919309139251709, "logits/real": -0.7276687026023865, "logps/generated": -599.495849609375, "logps/oppo_gen": -73.57182312011719, "logps/oppo_real": -302.60333251953125, "logps/real": -301.95623779296875, "loss": 0.3197, "loss/gen": 0.041153088212013245, "loss/real": 0.21765488386154175, "rewards/accuracies": 1.0, "rewards/generated": -525.924072265625, "rewards/margins": 526.571044921875, "rewards/real": 0.6470375061035156, "step": 176 }, { "epoch": 0.37, "grad_norm": 54.03969669011268, "learning_rate": 3.9694631307311825e-07, "logits/generated": 4.6008453369140625, "logits/oppo_generated": -2.830564022064209, "logits/oppo_real": -2.691429376602173, "logits/real": 0.8001154065132141, "logps/generated": -592.5567626953125, "logps/oppo_gen": -68.98664093017578, "logps/oppo_real": -178.04420471191406, "logps/real": -268.3450927734375, "loss": 0.4679, "loss/gen": 0.1762559711933136, "loss/real": 0.8209776878356934, "rewards/accuracies": 1.0, "rewards/generated": -523.5701293945312, "rewards/margins": 433.269287109375, "rewards/real": -90.3008804321289, "step": 177 }, { "epoch": 0.37, "grad_norm": 36.03140584311484, "learning_rate": 3.954647288894882e-07, "logits/generated": 1.2684483528137207, "logits/oppo_generated": -2.752481698989868, "logits/oppo_real": -2.8730828762054443, "logits/real": -0.48670902848243713, "logps/generated": -603.163330078125, "logps/oppo_gen": -70.56501770019531, "logps/oppo_real": -325.8918762207031, "logps/real": -301.71539306640625, "loss": 0.3475, "loss/gen": 0.11980742961168289, "loss/real": 0.16193076968193054, "rewards/accuracies": 1.0, "rewards/generated": -532.5982666015625, "rewards/margins": 556.7747192382812, "rewards/real": 24.176467895507812, "step": 178 }, { "epoch": 0.37, "grad_norm": 24.202740424236442, "learning_rate": 3.9397538011449896e-07, "logits/generated": 4.160590171813965, "logits/oppo_generated": -2.6507742404937744, "logits/oppo_real": -2.5858640670776367, "logits/real": 0.4383518695831299, "logps/generated": -581.2891845703125, "logps/oppo_gen": -63.321895599365234, "logps/oppo_real": -241.67031860351562, "logps/real": -224.41864013671875, "loss": 0.3749, "loss/gen": 0.17947952449321747, "loss/real": 0.175423726439476, "rewards/accuracies": 1.0, "rewards/generated": -517.96728515625, "rewards/margins": 535.218994140625, "rewards/real": 17.251686096191406, "step": 179 }, { "epoch": 0.38, "grad_norm": 40.32214788861777, "learning_rate": 3.9247834624635404e-07, "logits/generated": 5.015523433685303, "logits/oppo_generated": -2.7281503677368164, "logits/oppo_real": -2.559065818786621, "logits/real": 0.1538265496492386, "logps/generated": -683.5272827148438, "logps/oppo_gen": -62.72259521484375, "logps/oppo_real": -167.6991729736328, "logps/real": -162.67086791992188, "loss": 0.3274, "loss/gen": 0.0464489683508873, "loss/real": 0.20436066389083862, "rewards/accuracies": 1.0, "rewards/generated": -620.8046875, "rewards/margins": 625.8330078125, "rewards/real": 5.028313636779785, "step": 180 }, { "epoch": 0.38, "grad_norm": 39.37261122466273, "learning_rate": 3.9097370719347065e-07, "logits/generated": 2.0698001384735107, "logits/oppo_generated": -2.910065174102783, "logits/oppo_real": -2.79630970954895, "logits/real": -0.8888975381851196, "logps/generated": -638.67626953125, "logps/oppo_gen": -88.21836853027344, "logps/oppo_real": -352.46441650390625, "logps/real": -384.6054382324219, "loss": 0.3955, "loss/gen": 0.05475688725709915, "loss/real": 0.3640737533569336, "rewards/accuracies": 0.9375, "rewards/generated": -550.4578857421875, "rewards/margins": 518.31689453125, "rewards/real": -32.14099884033203, "step": 181 }, { "epoch": 0.38, "grad_norm": 46.871757700366594, "learning_rate": 3.894615432702143e-07, "logits/generated": 3.8333396911621094, "logits/oppo_generated": -2.741304397583008, "logits/oppo_real": -2.577056884765625, "logits/real": -1.3356674909591675, "logps/generated": -626.2735595703125, "logps/oppo_gen": -113.4937744140625, "logps/oppo_real": -357.3503723144531, "logps/real": -327.42108154296875, "loss": 0.3519, "loss/gen": 0.07857067883014679, "loss/real": 0.16131818294525146, "rewards/accuracies": 1.0, "rewards/generated": -512.7797241210938, "rewards/margins": 542.7090454101562, "rewards/real": 29.929283142089844, "step": 182 }, { "epoch": 0.38, "grad_norm": 23.50506796873482, "learning_rate": 3.879419351926115e-07, "logits/generated": 1.8759452104568481, "logits/oppo_generated": -2.8626461029052734, "logits/oppo_real": -2.836373805999756, "logits/real": -1.6114048957824707, "logps/generated": -601.630126953125, "logps/oppo_gen": -81.16427612304688, "logps/oppo_real": -371.1536865234375, "logps/real": -348.64056396484375, "loss": 0.3708, "loss/gen": 0.0639682188630104, "loss/real": 0.16622665524482727, "rewards/accuracies": 1.0, "rewards/generated": -520.4658203125, "rewards/margins": 542.9788818359375, "rewards/real": 22.513111114501953, "step": 183 }, { "epoch": 0.38, "grad_norm": 36.387666260140605, "learning_rate": 3.864149640740416e-07, "logits/generated": 0.818359375, "logits/oppo_generated": -2.6479756832122803, "logits/oppo_real": -2.969878673553467, "logits/real": -0.3535976707935333, "logps/generated": -607.1737060546875, "logps/oppo_gen": -84.10977172851562, "logps/oppo_real": -309.2869873046875, "logps/real": -311.79852294921875, "loss": 0.3416, "loss/gen": 0.15428604185581207, "loss/real": 0.2501806616783142, "rewards/accuracies": 1.0, "rewards/generated": -523.06396484375, "rewards/margins": 520.552490234375, "rewards/real": -2.511505126953125, "step": 184 }, { "epoch": 0.39, "grad_norm": 37.67410429944719, "learning_rate": 3.848807114209074e-07, "logits/generated": 3.9288644790649414, "logits/oppo_generated": -2.626985549926758, "logits/oppo_real": -2.517353057861328, "logits/real": -0.48309874534606934, "logps/generated": -566.0116577148438, "logps/oppo_gen": -63.896446228027344, "logps/oppo_real": -215.14120483398438, "logps/real": -218.39749145507812, "loss": 0.3856, "loss/gen": 0.05431937426328659, "loss/real": 0.25885841250419617, "rewards/accuracies": 1.0, "rewards/generated": -502.115234375, "rewards/margins": 498.85894775390625, "rewards/real": -3.2562923431396484, "step": 185 }, { "epoch": 0.39, "grad_norm": 29.94640904266789, "learning_rate": 3.833392591282838e-07, "logits/generated": 2.5921592712402344, "logits/oppo_generated": -2.8032891750335693, "logits/oppo_real": -2.858144760131836, "logits/real": 0.4319887161254883, "logps/generated": -600.787353515625, "logps/oppo_gen": -70.31141662597656, "logps/oppo_real": -278.02667236328125, "logps/real": -309.3092041015625, "loss": 0.3872, "loss/gen": 0.16086134314537048, "loss/real": 0.4028547406196594, "rewards/accuracies": 0.9375, "rewards/generated": -530.4760131835938, "rewards/margins": 499.19342041015625, "rewards/real": -31.282567977905273, "step": 186 }, { "epoch": 0.39, "grad_norm": 32.256352708250546, "learning_rate": 3.8179068947554705e-07, "logits/generated": 3.833861827850342, "logits/oppo_generated": -2.8886849880218506, "logits/oppo_real": -2.736198902130127, "logits/real": -0.7970997095108032, "logps/generated": -652.3579711914062, "logps/oppo_gen": -80.37522888183594, "logps/oppo_real": -261.8301086425781, "logps/real": -263.4853515625, "loss": 0.4055, "loss/gen": 0.05830386281013489, "loss/real": 0.27234482765197754, "rewards/accuracies": 1.0, "rewards/generated": -571.9827880859375, "rewards/margins": 570.3275756835938, "rewards/real": -1.6552231311798096, "step": 187 }, { "epoch": 0.39, "grad_norm": 49.28620130089024, "learning_rate": 3.8023508512198257e-07, "logits/generated": 4.238766193389893, "logits/oppo_generated": -2.9484691619873047, "logits/oppo_real": -2.766463041305542, "logits/real": -1.860574722290039, "logps/generated": -593.7321166992188, "logps/oppo_gen": -76.98384094238281, "logps/oppo_real": -369.90545654296875, "logps/real": -355.2281494140625, "loss": 0.3579, "loss/gen": 0.053326621651649475, "loss/real": 0.19532784819602966, "rewards/accuracies": 1.0, "rewards/generated": -516.748291015625, "rewards/margins": 531.4256591796875, "rewards/real": 14.677343368530273, "step": 188 }, { "epoch": 0.4, "grad_norm": 25.22844962359273, "learning_rate": 3.786725291023728e-07, "logits/generated": 1.9840850830078125, "logits/oppo_generated": -2.7869467735290527, "logits/oppo_real": -2.9065823554992676, "logits/real": 0.32308337092399597, "logps/generated": -601.5640869140625, "logps/oppo_gen": -69.37512969970703, "logps/oppo_real": -174.45965576171875, "logps/real": -216.3216094970703, "loss": 0.373, "loss/gen": 0.06400664150714874, "loss/real": 0.4876205623149872, "rewards/accuracies": 1.0, "rewards/generated": -532.18896484375, "rewards/margins": 490.3270263671875, "rewards/real": -41.861942291259766, "step": 189 }, { "epoch": 0.4, "grad_norm": 36.253108029476245, "learning_rate": 3.7710310482256523e-07, "logits/generated": 4.182609558105469, "logits/oppo_generated": -2.7686333656311035, "logits/oppo_real": -2.657388687133789, "logits/real": -0.6948004961013794, "logps/generated": -576.32470703125, "logps/oppo_gen": -78.72369384765625, "logps/oppo_real": -240.1993408203125, "logps/real": -226.7103729248047, "loss": 0.3604, "loss/gen": 0.044823385775089264, "loss/real": 0.18352998793125153, "rewards/accuracies": 1.0, "rewards/generated": -497.60101318359375, "rewards/margins": 511.0899658203125, "rewards/real": 13.4889554977417, "step": 190 }, { "epoch": 0.4, "grad_norm": 26.63954589090132, "learning_rate": 3.7552689605501986e-07, "logits/generated": 3.2763257026672363, "logits/oppo_generated": -2.657681941986084, "logits/oppo_real": -2.8181610107421875, "logits/real": 0.7460372447967529, "logps/generated": -595.9085693359375, "logps/oppo_gen": -73.89129638671875, "logps/oppo_real": -256.21337890625, "logps/real": -287.32672119140625, "loss": 0.3531, "loss/gen": 0.07981202751398087, "loss/real": 0.4028546214103699, "rewards/accuracies": 1.0, "rewards/generated": -522.017333984375, "rewards/margins": 490.9039306640625, "rewards/real": -31.113351821899414, "step": 191 }, { "epoch": 0.4, "grad_norm": 31.893125399588957, "learning_rate": 3.7394398693433794e-07, "logits/generated": 3.7813591957092285, "logits/oppo_generated": -2.6394004821777344, "logits/oppo_real": -2.7031853199005127, "logits/real": -1.6782422065734863, "logps/generated": -640.29931640625, "logps/oppo_gen": -71.51564025878906, "logps/oppo_real": -315.9356994628906, "logps/real": -289.91998291015625, "loss": 0.3326, "loss/gen": 0.09133920818567276, "loss/real": 0.16474120318889618, "rewards/accuracies": 1.0, "rewards/generated": -568.78369140625, "rewards/margins": 594.7993774414062, "rewards/real": 26.015716552734375, "step": 192 }, { "epoch": 0.4, "grad_norm": 29.62806962604255, "learning_rate": 3.7235446195277136e-07, "logits/generated": 2.645258665084839, "logits/oppo_generated": -2.579573154449463, "logits/oppo_real": -2.747130870819092, "logits/real": 0.6542664766311646, "logps/generated": -572.8811645507812, "logps/oppo_gen": -65.53421020507812, "logps/oppo_real": -227.9467010498047, "logps/real": -214.3878936767578, "loss": 0.2953, "loss/gen": 0.07026851177215576, "loss/real": 0.2094188630580902, "rewards/accuracies": 1.0, "rewards/generated": -507.346923828125, "rewards/margins": 520.90576171875, "rewards/real": 13.558823585510254, "step": 193 }, { "epoch": 0.41, "grad_norm": 34.85229715797081, "learning_rate": 3.7075840595571194e-07, "logits/generated": 3.5314037799835205, "logits/oppo_generated": -2.5842761993408203, "logits/oppo_real": -2.4687318801879883, "logits/real": 0.21015453338623047, "logps/generated": -634.6973876953125, "logps/oppo_gen": -81.10606384277344, "logps/oppo_real": -293.9441833496094, "logps/real": -280.90740966796875, "loss": 0.3575, "loss/gen": 0.03518790006637573, "loss/real": 0.19670113921165466, "rewards/accuracies": 1.0, "rewards/generated": -553.59130859375, "rewards/margins": 566.628173828125, "rewards/real": 13.03675651550293, "step": 194 }, { "epoch": 0.41, "grad_norm": 23.25440855171001, "learning_rate": 3.691559041371631e-07, "logits/generated": 4.113855838775635, "logits/oppo_generated": -2.7571568489074707, "logits/oppo_real": -2.7542152404785156, "logits/real": -0.6659407615661621, "logps/generated": -763.6599731445312, "logps/oppo_gen": -82.74861145019531, "logps/oppo_real": -304.05267333984375, "logps/real": -311.4844970703125, "loss": 0.2873, "loss/gen": 0.03204440325498581, "loss/real": 0.2777453064918518, "rewards/accuracies": 1.0, "rewards/generated": -680.911376953125, "rewards/margins": 673.4795532226562, "rewards/real": -7.431818962097168, "step": 195 }, { "epoch": 0.41, "grad_norm": 39.75584359071475, "learning_rate": 3.6754704203519204e-07, "logits/generated": 2.313502788543701, "logits/oppo_generated": -2.8141732215881348, "logits/oppo_real": -2.7166080474853516, "logits/real": -0.3117219805717468, "logps/generated": -692.5143432617188, "logps/oppo_gen": -85.08738708496094, "logps/oppo_real": -330.571533203125, "logps/real": -318.1523742675781, "loss": 0.353, "loss/gen": 0.027993863448500633, "loss/real": 0.19043594598770142, "rewards/accuracies": 1.0, "rewards/generated": -607.4268798828125, "rewards/margins": 619.8460693359375, "rewards/real": 12.419178009033203, "step": 196 }, { "epoch": 0.41, "grad_norm": 30.445406722974546, "learning_rate": 3.659319055273644e-07, "logits/generated": 1.316972255706787, "logits/oppo_generated": -2.779865264892578, "logits/oppo_real": -2.7747716903686523, "logits/real": -1.5569334030151367, "logps/generated": -624.56494140625, "logps/oppo_gen": -87.4921646118164, "logps/oppo_real": -331.7750244140625, "logps/real": -331.03033447265625, "loss": 0.3985, "loss/gen": 0.06687048077583313, "loss/real": 0.3081396222114563, "rewards/accuracies": 1.0, "rewards/generated": -537.07275390625, "rewards/margins": 537.8175048828125, "rewards/real": 0.7447280883789062, "step": 197 }, { "epoch": 0.41, "grad_norm": 32.295338542916106, "learning_rate": 3.643105808261596e-07, "logits/generated": 1.9061369895935059, "logits/oppo_generated": -2.7027125358581543, "logits/oppo_real": -2.902536392211914, "logits/real": -0.43068477511405945, "logps/generated": -520.6417846679688, "logps/oppo_gen": -55.610694885253906, "logps/oppo_real": -240.93789672851562, "logps/real": -245.14500427246094, "loss": 0.3222, "loss/gen": 0.1180552989244461, "loss/real": 0.30835291743278503, "rewards/accuracies": 0.9375, "rewards/generated": -465.03106689453125, "rewards/margins": 460.823974609375, "rewards/real": -4.207094192504883, "step": 198 }, { "epoch": 0.42, "grad_norm": 34.458162896973704, "learning_rate": 3.626831544743697e-07, "logits/generated": 2.6215648651123047, "logits/oppo_generated": -2.5582261085510254, "logits/oppo_real": -2.8184330463409424, "logits/real": -1.3916691541671753, "logps/generated": -576.6791381835938, "logps/oppo_gen": -63.67759704589844, "logps/oppo_real": -285.08526611328125, "logps/real": -287.012939453125, "loss": 0.4403, "loss/gen": 0.15199309587478638, "loss/real": 0.25518083572387695, "rewards/accuracies": 1.0, "rewards/generated": -513.00146484375, "rewards/margins": 511.0738525390625, "rewards/real": -1.9276609420776367, "step": 199 }, { "epoch": 0.42, "grad_norm": 22.067206639869912, "learning_rate": 3.610497133404795e-07, "logits/generated": 2.2441086769104004, "logits/oppo_generated": -2.7718663215637207, "logits/oppo_real": -2.604949951171875, "logits/real": -1.599900722503662, "logps/generated": -721.7673950195312, "logps/oppo_gen": -86.70479583740234, "logps/oppo_real": -356.1251220703125, "logps/real": -336.54754638671875, "loss": 0.3413, "loss/gen": 0.0732014924287796, "loss/real": 0.17360037565231323, "rewards/accuracies": 1.0, "rewards/generated": -635.0626220703125, "rewards/margins": 654.64013671875, "rewards/real": 19.577552795410156, "step": 200 }, { "epoch": 0.42, "grad_norm": 23.872160897538553, "learning_rate": 3.594103446140297e-07, "logits/generated": 1.501354455947876, "logits/oppo_generated": -2.8155646324157715, "logits/oppo_real": -2.766092538833618, "logits/real": -1.9259686470031738, "logps/generated": -654.544189453125, "logps/oppo_gen": -73.81175231933594, "logps/oppo_real": -317.19415283203125, "logps/real": -305.828857421875, "loss": 0.345, "loss/gen": 0.13120338320732117, "loss/real": 0.18511003255844116, "rewards/accuracies": 1.0, "rewards/generated": -580.7324829101562, "rewards/margins": 592.0977783203125, "rewards/real": 11.36532974243164, "step": 201 }, { "epoch": 0.42, "grad_norm": 37.714581376935634, "learning_rate": 3.5776513580096313e-07, "logits/generated": 1.715670108795166, "logits/oppo_generated": -2.750485420227051, "logits/oppo_real": -2.715085983276367, "logits/real": -1.2266333103179932, "logps/generated": -681.1434936523438, "logps/oppo_gen": -89.23578643798828, "logps/oppo_real": -368.6956481933594, "logps/real": -347.9045104980469, "loss": 0.3083, "loss/gen": 0.0492560900747776, "loss/real": 0.1700318455696106, "rewards/accuracies": 1.0, "rewards/generated": -591.90771484375, "rewards/margins": 612.6989135742188, "rewards/real": 20.7911376953125, "step": 202 }, { "epoch": 0.42, "grad_norm": 34.95580007644475, "learning_rate": 3.5611417471895376e-07, "logits/generated": 2.565880298614502, "logits/oppo_generated": -2.6933023929595947, "logits/oppo_real": -2.7496652603149414, "logits/real": -0.9516716599464417, "logps/generated": -719.45947265625, "logps/oppo_gen": -51.82988739013672, "logps/oppo_real": -141.92869567871094, "logps/real": -167.80970764160156, "loss": 0.3703, "loss/gen": 0.06500092148780823, "loss/real": 0.403279185295105, "rewards/accuracies": 1.0, "rewards/generated": -667.629638671875, "rewards/margins": 641.7485961914062, "rewards/real": -25.881032943725586, "step": 203 }, { "epoch": 0.43, "grad_norm": 52.17167377918668, "learning_rate": 3.5445754949271924e-07, "logits/generated": 2.1220145225524902, "logits/oppo_generated": -2.9500808715820312, "logits/oppo_real": -2.7065343856811523, "logits/real": -2.146017551422119, "logps/generated": -680.4698486328125, "logps/oppo_gen": -90.31026458740234, "logps/oppo_real": -439.34735107421875, "logps/real": -405.097900390625, "loss": 0.43, "loss/gen": 0.11432987451553345, "loss/real": 0.14855104684829712, "rewards/accuracies": 1.0, "rewards/generated": -590.1595458984375, "rewards/margins": 624.4090576171875, "rewards/real": 34.24946594238281, "step": 204 }, { "epoch": 0.43, "grad_norm": 42.23183145318973, "learning_rate": 3.5279534854931674e-07, "logits/generated": 2.65057373046875, "logits/oppo_generated": -2.7080841064453125, "logits/oppo_real": -2.7903661727905273, "logits/real": -0.8641007542610168, "logps/generated": -726.29443359375, "logps/oppo_gen": -78.16770935058594, "logps/oppo_real": -221.12213134765625, "logps/real": -214.3006134033203, "loss": 0.36, "loss/gen": 0.03191296383738518, "loss/real": 0.19759435951709747, "rewards/accuracies": 1.0, "rewards/generated": -648.126708984375, "rewards/margins": 654.9482421875, "rewards/real": 6.82151460647583, "step": 205 }, { "epoch": 0.43, "grad_norm": 44.82334166489389, "learning_rate": 3.511276606134234e-07, "logits/generated": 2.2213454246520996, "logits/oppo_generated": -2.7255749702453613, "logits/oppo_real": -2.866260528564453, "logits/real": -1.4539179801940918, "logps/generated": -683.35986328125, "logps/oppo_gen": -71.52497863769531, "logps/oppo_real": -245.4722900390625, "logps/real": -217.90753173828125, "loss": 0.3083, "loss/gen": 0.02118522860109806, "loss/real": 0.1588922142982483, "rewards/accuracies": 1.0, "rewards/generated": -611.8348999023438, "rewards/margins": 639.3995971679688, "rewards/real": 27.564727783203125, "step": 206 }, { "epoch": 0.43, "grad_norm": 89.52327625923607, "learning_rate": 3.4945457470259987e-07, "logits/generated": 2.679196834564209, "logits/oppo_generated": -2.712146759033203, "logits/oppo_real": -2.5725181102752686, "logits/real": -1.3774299621582031, "logps/generated": -809.5963745117188, "logps/oppo_gen": -95.62235260009766, "logps/oppo_real": -342.45953369140625, "logps/real": -347.9728088378906, "loss": 0.3979, "loss/gen": 0.012037093751132488, "loss/real": 0.31207871437072754, "rewards/accuracies": 0.9375, "rewards/generated": -713.9739990234375, "rewards/margins": 708.460693359375, "rewards/real": -5.513291358947754, "step": 207 }, { "epoch": 0.44, "grad_norm": 33.33843385456191, "learning_rate": 3.4777618012253895e-07, "logits/generated": 0.3462332487106323, "logits/oppo_generated": -2.733717441558838, "logits/oppo_real": -2.6825289726257324, "logits/real": -1.5442677736282349, "logps/generated": -749.8042602539062, "logps/oppo_gen": -92.93001556396484, "logps/oppo_real": -297.9956359863281, "logps/real": -312.69561767578125, "loss": 0.3289, "loss/gen": 0.025040730834007263, "loss/real": 0.3518640995025635, "rewards/accuracies": 0.9375, "rewards/generated": -656.874267578125, "rewards/margins": 642.1741943359375, "rewards/real": -14.699989318847656, "step": 208 }, { "epoch": 0.44, "grad_norm": 32.84499135380081, "learning_rate": 3.4609256646229903e-07, "logits/generated": 0.9510908126831055, "logits/oppo_generated": -2.6556482315063477, "logits/oppo_real": -2.5293190479278564, "logits/real": -1.8658448457717896, "logps/generated": -683.0816650390625, "logps/oppo_gen": -99.02784729003906, "logps/oppo_real": -402.88189697265625, "logps/real": -354.7418212890625, "loss": 0.2956, "loss/gen": 0.06923627853393555, "loss/real": 0.1563805490732193, "rewards/accuracies": 1.0, "rewards/generated": -584.0538330078125, "rewards/margins": 632.1939697265625, "rewards/real": 48.140071868896484, "step": 209 }, { "epoch": 0.44, "grad_norm": 49.95857170415053, "learning_rate": 3.4440382358952115e-07, "logits/generated": 0.15448346734046936, "logits/oppo_generated": -2.720691680908203, "logits/oppo_real": -2.57096529006958, "logits/real": -1.851238489151001, "logps/generated": -605.6126708984375, "logps/oppo_gen": -92.30026245117188, "logps/oppo_real": -256.4493408203125, "logps/real": -253.73361206054688, "loss": 0.3509, "loss/gen": 0.21256473660469055, "loss/real": 0.2324458658695221, "rewards/accuracies": 1.0, "rewards/generated": -513.3124389648438, "rewards/margins": 516.0281372070312, "rewards/real": 2.71573543548584, "step": 210 }, { "epoch": 0.44, "grad_norm": 36.08240825591988, "learning_rate": 3.4271004164563294e-07, "logits/generated": -0.19886772334575653, "logits/oppo_generated": -2.9380054473876953, "logits/oppo_real": -2.9733734130859375, "logits/real": -2.3697614669799805, "logps/generated": -543.4454956054688, "logps/oppo_gen": -72.3819808959961, "logps/oppo_real": -333.6939697265625, "logps/real": -319.1031799316406, "loss": 0.3138, "loss/gen": 0.3100494146347046, "loss/real": 0.18639668822288513, "rewards/accuracies": 1.0, "rewards/generated": -471.06353759765625, "rewards/margins": 485.65435791015625, "rewards/real": 14.590840339660645, "step": 211 }, { "epoch": 0.44, "grad_norm": 32.91568040617667, "learning_rate": 3.410113110410366e-07, "logits/generated": 0.19806131720542908, "logits/oppo_generated": -2.8069820404052734, "logits/oppo_real": -2.969247341156006, "logits/real": -2.11154842376709, "logps/generated": -629.618896484375, "logps/oppo_gen": -69.23855590820312, "logps/oppo_real": -293.89312744140625, "logps/real": -266.492431640625, "loss": 0.3423, "loss/gen": 0.048031456768512726, "loss/real": 0.15945301949977875, "rewards/accuracies": 1.0, "rewards/generated": -560.38037109375, "rewards/margins": 587.7811279296875, "rewards/real": 27.400691986083984, "step": 212 }, { "epoch": 0.45, "grad_norm": 38.56999521973007, "learning_rate": 3.3930772245028317e-07, "logits/generated": 1.8033071756362915, "logits/oppo_generated": -2.7724642753601074, "logits/oppo_real": -2.654409885406494, "logits/real": -1.6350951194763184, "logps/generated": -644.2208251953125, "logps/oppo_gen": -77.64331817626953, "logps/oppo_real": -251.16159057617188, "logps/real": -259.20703125, "loss": 0.3801, "loss/gen": 0.061860501766204834, "loss/real": 0.3248833417892456, "rewards/accuracies": 1.0, "rewards/generated": -566.5775146484375, "rewards/margins": 558.5321044921875, "rewards/real": -8.045448303222656, "step": 213 }, { "epoch": 0.45, "grad_norm": 27.540038768156492, "learning_rate": 3.3759936680723233e-07, "logits/generated": 1.1407092809677124, "logits/oppo_generated": -2.727145195007324, "logits/oppo_real": -2.6046769618988037, "logits/real": -1.5037585496902466, "logps/generated": -647.4967041015625, "logps/oppo_gen": -80.66487121582031, "logps/oppo_real": -269.0631408691406, "logps/real": -247.17236328125, "loss": 0.3, "loss/gen": 0.13781431317329407, "loss/real": 0.25224390625953674, "rewards/accuracies": 1.0, "rewards/generated": -566.8319091796875, "rewards/margins": 588.72265625, "rewards/real": 21.890777587890625, "step": 214 }, { "epoch": 0.45, "grad_norm": 34.52746196700262, "learning_rate": 3.3588633530019866e-07, "logits/generated": 1.7109321355819702, "logits/oppo_generated": -2.8297104835510254, "logits/oppo_real": -2.8279013633728027, "logits/real": -1.996672511100769, "logps/generated": -689.519775390625, "logps/oppo_gen": -81.06492614746094, "logps/oppo_real": -267.75341796875, "logps/real": -262.6698913574219, "loss": 0.3261, "loss/gen": 0.15056224167346954, "loss/real": 0.2486058473587036, "rewards/accuracies": 1.0, "rewards/generated": -608.454833984375, "rewards/margins": 613.5383911132812, "rewards/real": 5.083520889282227, "step": 215 }, { "epoch": 0.45, "grad_norm": 30.925401082749215, "learning_rate": 3.341687193670843e-07, "logits/generated": 1.0987327098846436, "logits/oppo_generated": -2.6814069747924805, "logits/oppo_real": -3.02249813079834, "logits/real": -1.2881546020507812, "logps/generated": -674.28515625, "logps/oppo_gen": -64.46792602539062, "logps/oppo_real": -302.4736328125, "logps/real": -339.430419921875, "loss": 0.3165, "loss/gen": 0.03633798286318779, "loss/real": 0.5284652709960938, "rewards/accuracies": 1.0, "rewards/generated": -609.8171997070312, "rewards/margins": 572.8603515625, "rewards/real": -36.95682907104492, "step": 216 }, { "epoch": 0.45, "grad_norm": 50.21994106397129, "learning_rate": 3.3244661069049806e-07, "logits/generated": 1.24822998046875, "logits/oppo_generated": -2.7858104705810547, "logits/oppo_real": -2.6971521377563477, "logits/real": -1.740341067314148, "logps/generated": -610.5947265625, "logps/oppo_gen": -71.21235656738281, "logps/oppo_real": -320.8017578125, "logps/real": -311.95843505859375, "loss": 0.3234, "loss/gen": 0.07200797647237778, "loss/real": 0.25545698404312134, "rewards/accuracies": 1.0, "rewards/generated": -539.38232421875, "rewards/margins": 548.2257080078125, "rewards/real": 8.84335708618164, "step": 217 }, { "epoch": 0.46, "grad_norm": 49.23734271839566, "learning_rate": 3.3072010119286155e-07, "logits/generated": 1.5630497932434082, "logits/oppo_generated": -2.6927084922790527, "logits/oppo_real": -2.880934715270996, "logits/real": -1.2201228141784668, "logps/generated": -623.26708984375, "logps/oppo_gen": -74.21060180664062, "logps/oppo_real": -299.62982177734375, "logps/real": -348.39190673828125, "loss": 0.4195, "loss/gen": 0.14255878329277039, "loss/real": 0.5940058827400208, "rewards/accuracies": 0.875, "rewards/generated": -549.0565185546875, "rewards/margins": 500.2944030761719, "rewards/real": -48.7620964050293, "step": 218 }, { "epoch": 0.46, "grad_norm": 22.2264169591017, "learning_rate": 3.289892830315028e-07, "logits/generated": 4.247753620147705, "logits/oppo_generated": -2.771029472351074, "logits/oppo_real": -2.5227415561676025, "logits/real": -1.2131158113479614, "logps/generated": -623.5982666015625, "logps/oppo_gen": -62.89678955078125, "logps/oppo_real": -163.27670288085938, "logps/real": -179.0076141357422, "loss": 0.3323, "loss/gen": 0.07575614750385284, "loss/real": 0.32401931285858154, "rewards/accuracies": 1.0, "rewards/generated": -560.7014770507812, "rewards/margins": 544.9705810546875, "rewards/real": -15.730911254882812, "step": 219 }, { "epoch": 0.46, "grad_norm": 27.218516790045705, "learning_rate": 3.272542485937368e-07, "logits/generated": -0.3621227741241455, "logits/oppo_generated": -2.86175799369812, "logits/oppo_real": -2.841768503189087, "logits/real": -2.256178855895996, "logps/generated": -679.428466796875, "logps/oppo_gen": -74.02748107910156, "logps/oppo_real": -279.630859375, "logps/real": -263.0206298828125, "loss": 0.3413, "loss/gen": 0.11687298119068146, "loss/real": 0.18065857887268066, "rewards/accuracies": 1.0, "rewards/generated": -605.4010620117188, "rewards/margins": 622.0112915039062, "rewards/real": 16.610258102416992, "step": 220 }, { "epoch": 0.46, "grad_norm": 26.52582141348607, "learning_rate": 3.2551509049193444e-07, "logits/generated": -0.09197130799293518, "logits/oppo_generated": -2.889202356338501, "logits/oppo_real": -2.841768741607666, "logits/real": -2.298105001449585, "logps/generated": -709.23681640625, "logps/oppo_gen": -96.27604675292969, "logps/oppo_real": -258.4027099609375, "logps/real": -264.6490173339844, "loss": 0.3246, "loss/gen": 0.029492512345314026, "loss/real": 0.24392205476760864, "rewards/accuracies": 1.0, "rewards/generated": -612.9607543945312, "rewards/margins": 606.7144775390625, "rewards/real": -6.246295928955078, "step": 221 }, { "epoch": 0.46, "grad_norm": 34.78987391682295, "learning_rate": 3.2377190155857864e-07, "logits/generated": 0.3520706295967102, "logits/oppo_generated": -2.6503279209136963, "logits/oppo_real": -2.805267333984375, "logits/real": -1.7184969186782837, "logps/generated": -657.2587890625, "logps/oppo_gen": -77.18238067626953, "logps/oppo_real": -260.9399108886719, "logps/real": -246.47340393066406, "loss": 0.2943, "loss/gen": 0.1653580665588379, "loss/real": 0.19109240174293518, "rewards/accuracies": 0.9375, "rewards/generated": -580.076416015625, "rewards/margins": 594.5429077148438, "rewards/real": 14.466522216796875, "step": 222 }, { "epoch": 0.47, "grad_norm": 45.26956478798697, "learning_rate": 3.220247748413094e-07, "logits/generated": 0.03710488975048065, "logits/oppo_generated": -2.5938522815704346, "logits/oppo_real": -2.520634651184082, "logits/real": -1.9206852912902832, "logps/generated": -496.4818115234375, "logps/oppo_gen": -65.14288330078125, "logps/oppo_real": -270.13726806640625, "logps/real": -247.87417602539062, "loss": 0.4573, "loss/gen": 0.1664436161518097, "loss/real": 0.16675496101379395, "rewards/accuracies": 1.0, "rewards/generated": -431.3388977050781, "rewards/margins": 453.6019592285156, "rewards/real": 22.263086318969727, "step": 223 }, { "epoch": 0.47, "grad_norm": 39.78434296695749, "learning_rate": 3.2027380359795706e-07, "logits/generated": -0.20777527987957, "logits/oppo_generated": -2.839998245239258, "logits/oppo_real": -2.971149444580078, "logits/real": -2.094723701477051, "logps/generated": -697.270263671875, "logps/oppo_gen": -77.53987121582031, "logps/oppo_real": -300.1747741699219, "logps/real": -289.88665771484375, "loss": 0.3072, "loss/gen": 0.017790913581848145, "loss/real": 0.18654951453208923, "rewards/accuracies": 1.0, "rewards/generated": -619.7303466796875, "rewards/margins": 630.0185546875, "rewards/real": 10.28813362121582, "step": 224 }, { "epoch": 0.47, "grad_norm": 38.24634090698661, "learning_rate": 3.185190812915646e-07, "logits/generated": 0.6704794764518738, "logits/oppo_generated": -2.6874566078186035, "logits/oppo_real": -2.7883381843566895, "logits/real": -1.6932792663574219, "logps/generated": -531.7930908203125, "logps/oppo_gen": -65.7908935546875, "logps/oppo_real": -203.2254638671875, "logps/real": -217.8573455810547, "loss": 0.3306, "loss/gen": 0.25845158100128174, "loss/real": 0.3562762141227722, "rewards/accuracies": 0.9375, "rewards/generated": -466.002197265625, "rewards/margins": 451.37030029296875, "rewards/real": -14.631880760192871, "step": 225 }, { "epoch": 0.47, "grad_norm": 28.64358490228986, "learning_rate": 3.167607015853983e-07, "logits/generated": 0.9228378534317017, "logits/oppo_generated": -2.804375171661377, "logits/oppo_real": -2.7193827629089355, "logits/real": -2.1706273555755615, "logps/generated": -688.7299194335938, "logps/oppo_gen": -84.42399597167969, "logps/oppo_real": -334.47344970703125, "logps/real": -320.31610107421875, "loss": 0.3595, "loss/gen": 0.07177238911390305, "loss/real": 0.17948225140571594, "rewards/accuracies": 1.0, "rewards/generated": -604.305908203125, "rewards/margins": 618.4632568359375, "rewards/real": 14.157352447509766, "step": 226 }, { "epoch": 0.47, "grad_norm": 34.51988819020222, "learning_rate": 3.149987583379485e-07, "logits/generated": 1.21856689453125, "logits/oppo_generated": -2.7994980812072754, "logits/oppo_real": -2.839235782623291, "logits/real": -2.0786094665527344, "logps/generated": -732.9527587890625, "logps/oppo_gen": -77.09896850585938, "logps/oppo_real": -191.4404296875, "logps/real": -195.81341552734375, "loss": 0.3501, "loss/gen": 0.14099054038524628, "loss/real": 0.2566527724266052, "rewards/accuracies": 0.9375, "rewards/generated": -655.8538208007812, "rewards/margins": 651.4808349609375, "rewards/real": -4.372990131378174, "step": 227 }, { "epoch": 0.48, "grad_norm": 68.5395215596447, "learning_rate": 3.1323334559792015e-07, "logits/generated": 0.6095637083053589, "logits/oppo_generated": -2.798116683959961, "logits/oppo_real": -2.8187661170959473, "logits/real": -2.233832836151123, "logps/generated": -621.18408203125, "logps/oppo_gen": -64.57658386230469, "logps/oppo_real": -321.273193359375, "logps/real": -329.33807373046875, "loss": 0.3941, "loss/gen": 0.04750348627567291, "loss/real": 0.34501129388809204, "rewards/accuracies": 0.9375, "rewards/generated": -556.607421875, "rewards/margins": 548.5426025390625, "rewards/real": -8.064876556396484, "step": 228 }, { "epoch": 0.48, "grad_norm": 28.440128012153114, "learning_rate": 3.114645575992116e-07, "logits/generated": 1.1552635431289673, "logits/oppo_generated": -2.8417534828186035, "logits/oppo_real": -2.915761947631836, "logits/real": -1.9439736604690552, "logps/generated": -694.2273559570312, "logps/oppo_gen": -83.11656188964844, "logps/oppo_real": -318.304443359375, "logps/real": -300.11590576171875, "loss": 0.308, "loss/gen": 0.08618447184562683, "loss/real": 0.2467232197523117, "rewards/accuracies": 1.0, "rewards/generated": -611.11083984375, "rewards/margins": 629.29931640625, "rewards/real": 18.188520431518555, "step": 229 }, { "epoch": 0.48, "grad_norm": 42.474537121797304, "learning_rate": 3.096924887558854e-07, "logits/generated": 0.5934816002845764, "logits/oppo_generated": -2.716702461242676, "logits/oppo_real": -2.8514609336853027, "logits/real": -1.4103012084960938, "logps/generated": -645.125244140625, "logps/oppo_gen": -69.85491943359375, "logps/oppo_real": -260.97369384765625, "logps/real": -300.82269287109375, "loss": 0.3277, "loss/gen": 0.1709042340517044, "loss/real": 0.5244534015655518, "rewards/accuracies": 0.875, "rewards/generated": -575.270263671875, "rewards/margins": 535.4212646484375, "rewards/real": -39.84899139404297, "step": 230 }, { "epoch": 0.48, "grad_norm": 37.19761384263697, "learning_rate": 3.079172336571286e-07, "logits/generated": 1.9427441358566284, "logits/oppo_generated": -2.795680522918701, "logits/oppo_real": -2.8359665870666504, "logits/real": -1.5810625553131104, "logps/generated": -670.4180908203125, "logps/oppo_gen": -72.59526062011719, "logps/oppo_real": -213.2947998046875, "logps/real": -207.31228637695312, "loss": 0.3329, "loss/gen": 0.07098525762557983, "loss/real": 0.22547532618045807, "rewards/accuracies": 1.0, "rewards/generated": -597.8228759765625, "rewards/margins": 603.805419921875, "rewards/real": 5.982503414154053, "step": 231 }, { "epoch": 0.49, "grad_norm": 27.7476288753314, "learning_rate": 3.061388870622033e-07, "logits/generated": 0.8548299670219421, "logits/oppo_generated": -2.654226303100586, "logits/oppo_real": -2.6452994346618652, "logits/real": -1.6692825555801392, "logps/generated": -665.5245361328125, "logps/oppo_gen": -83.74305725097656, "logps/oppo_real": -318.2536315917969, "logps/real": -321.6038513183594, "loss": 0.3093, "loss/gen": 0.05284074321389198, "loss/real": 0.29021862149238586, "rewards/accuracies": 1.0, "rewards/generated": -581.781494140625, "rewards/margins": 578.4312133789062, "rewards/real": -3.350205421447754, "step": 232 }, { "epoch": 0.49, "grad_norm": 36.705464684366994, "learning_rate": 3.0435754389538925e-07, "logits/generated": 1.5102429389953613, "logits/oppo_generated": -2.7181339263916016, "logits/oppo_real": -2.965839385986328, "logits/real": -1.0968390703201294, "logps/generated": -595.69189453125, "logps/oppo_gen": -67.46559143066406, "logps/oppo_real": -231.64990234375, "logps/real": -232.36026000976562, "loss": 0.3101, "loss/gen": 0.11512690782546997, "loss/real": 0.2681490182876587, "rewards/accuracies": 0.9375, "rewards/generated": -528.226318359375, "rewards/margins": 527.5159301757812, "rewards/real": -0.710362434387207, "step": 233 }, { "epoch": 0.49, "grad_norm": 25.163564057078393, "learning_rate": 3.0257329924091654e-07, "logits/generated": 2.2034385204315186, "logits/oppo_generated": -2.7173333168029785, "logits/oppo_real": -2.6980838775634766, "logits/real": -1.6483628749847412, "logps/generated": -850.8960571289062, "logps/oppo_gen": -86.72967529296875, "logps/oppo_real": -260.89862060546875, "logps/real": -256.45538330078125, "loss": 0.3215, "loss/gen": 0.03136850893497467, "loss/real": 0.23244068026542664, "rewards/accuracies": 1.0, "rewards/generated": -764.1663818359375, "rewards/margins": 768.609619140625, "rewards/real": 4.443211555480957, "step": 234 }, { "epoch": 0.49, "grad_norm": 30.334419426669847, "learning_rate": 3.007862483378906e-07, "logits/generated": 0.15867102146148682, "logits/oppo_generated": -2.742459774017334, "logits/oppo_real": -2.9399333000183105, "logits/real": -2.043656587600708, "logps/generated": -684.0328369140625, "logps/oppo_gen": -86.16322326660156, "logps/oppo_real": -312.35125732421875, "logps/real": -311.46185302734375, "loss": 0.2987, "loss/gen": 0.050678517669439316, "loss/real": 0.25761735439300537, "rewards/accuracies": 1.0, "rewards/generated": -597.86962890625, "rewards/margins": 598.759033203125, "rewards/real": 0.8893804550170898, "step": 235 }, { "epoch": 0.49, "grad_norm": 25.297935383504495, "learning_rate": 2.989964865752079e-07, "logits/generated": 1.1529502868652344, "logits/oppo_generated": -2.7969439029693604, "logits/oppo_real": -2.782660484313965, "logits/real": -2.0899152755737305, "logps/generated": -697.8406982421875, "logps/oppo_gen": -88.234375, "logps/oppo_real": -255.04251098632812, "logps/real": -238.53860473632812, "loss": 0.3265, "loss/gen": 0.04399724677205086, "loss/real": 0.2120170146226883, "rewards/accuracies": 1.0, "rewards/generated": -609.6063232421875, "rewards/margins": 626.1102294921875, "rewards/real": 16.50393295288086, "step": 236 }, { "epoch": 0.5, "grad_norm": 31.417229307551633, "learning_rate": 2.97204109486465e-07, "logits/generated": 1.7922279834747314, "logits/oppo_generated": -2.7455062866210938, "logits/oppo_real": -2.7361059188842773, "logits/real": -1.599406361579895, "logps/generated": -667.8589477539062, "logps/oppo_gen": -81.91145324707031, "logps/oppo_real": -264.510498046875, "logps/real": -259.50238037109375, "loss": 0.3382, "loss/gen": 0.08875064551830292, "loss/real": 0.21604704856872559, "rewards/accuracies": 1.0, "rewards/generated": -585.9474487304688, "rewards/margins": 590.95556640625, "rewards/real": 5.00810432434082, "step": 237 }, { "epoch": 0.5, "grad_norm": 36.81499852248995, "learning_rate": 2.954092127448591e-07, "logits/generated": 0.43488985300064087, "logits/oppo_generated": -2.644202709197998, "logits/oppo_real": -2.7387442588806152, "logits/real": -1.5985989570617676, "logps/generated": -613.1663818359375, "logps/oppo_gen": -65.2353744506836, "logps/oppo_real": -209.83143615722656, "logps/real": -235.2373046875, "loss": 0.3054, "loss/gen": 0.07669935375452042, "loss/real": 0.38457173109054565, "rewards/accuracies": 1.0, "rewards/generated": -547.9310302734375, "rewards/margins": 522.525146484375, "rewards/real": -25.405866622924805, "step": 238 }, { "epoch": 0.5, "grad_norm": 68.19579535357184, "learning_rate": 2.9361189215808057e-07, "logits/generated": 2.575610399246216, "logits/oppo_generated": -2.8439998626708984, "logits/oppo_real": -2.853848934173584, "logits/real": -1.4824180603027344, "logps/generated": -647.5440063476562, "logps/oppo_gen": -75.34915161132812, "logps/oppo_real": -284.36083984375, "logps/real": -330.95611572265625, "loss": 0.3659, "loss/gen": 0.05739718675613403, "loss/real": 0.6472922563552856, "rewards/accuracies": 0.9375, "rewards/generated": -572.1948852539062, "rewards/margins": 525.599609375, "rewards/real": -46.595272064208984, "step": 239 }, { "epoch": 0.5, "grad_norm": 47.93619874782131, "learning_rate": 2.9181224366319943e-07, "logits/generated": 0.38327261805534363, "logits/oppo_generated": -2.768341064453125, "logits/oppo_real": -2.7443935871124268, "logits/real": -1.9321752786636353, "logps/generated": -622.6148681640625, "logps/oppo_gen": -68.6933822631836, "logps/oppo_real": -232.75717163085938, "logps/real": -214.53204345703125, "loss": 0.3302, "loss/gen": 0.0978286862373352, "loss/real": 0.20635342597961426, "rewards/accuracies": 1.0, "rewards/generated": -553.9214477539062, "rewards/margins": 572.1466064453125, "rewards/real": 18.225135803222656, "step": 240 }, { "epoch": 0.5, "grad_norm": 30.52824219927089, "learning_rate": 2.900103633215447e-07, "logits/generated": 0.9182009100914001, "logits/oppo_generated": -2.757927417755127, "logits/oppo_real": -2.7806620597839355, "logits/real": -1.9723321199417114, "logps/generated": -639.5178833007812, "logps/oppo_gen": -86.59483337402344, "logps/oppo_real": -236.5889434814453, "logps/real": -256.336181640625, "loss": 0.2817, "loss/gen": 0.08502039313316345, "loss/real": 0.28419214487075806, "rewards/accuracies": 0.9375, "rewards/generated": -552.9230346679688, "rewards/margins": 533.1757202148438, "rewards/real": -19.747272491455078, "step": 241 }, { "epoch": 0.51, "grad_norm": 39.09951839687015, "learning_rate": 2.882063473135763e-07, "logits/generated": 1.0408952236175537, "logits/oppo_generated": -2.827584743499756, "logits/oppo_real": -2.9590084552764893, "logits/real": -2.2290215492248535, "logps/generated": -623.004638671875, "logps/oppo_gen": -68.2485580444336, "logps/oppo_real": -362.74945068359375, "logps/real": -344.467529296875, "loss": 0.3063, "loss/gen": 0.09477907419204712, "loss/real": 0.18900375068187714, "rewards/accuracies": 1.0, "rewards/generated": -554.756103515625, "rewards/margins": 573.0380859375, "rewards/real": 18.281963348388672, "step": 242 }, { "epoch": 0.51, "grad_norm": 45.05031908877006, "learning_rate": 2.864002919337512e-07, "logits/generated": 0.8644614219665527, "logits/oppo_generated": -2.783228874206543, "logits/oppo_real": -2.742513656616211, "logits/real": -1.9242509603500366, "logps/generated": -637.9912719726562, "logps/oppo_gen": -65.0790786743164, "logps/oppo_real": -264.8369445800781, "logps/real": -255.57125854492188, "loss": 0.3486, "loss/gen": 0.12847480177879333, "loss/real": 0.24175365269184113, "rewards/accuracies": 1.0, "rewards/generated": -572.9121704101562, "rewards/margins": 582.1778564453125, "rewards/real": 9.265676498413086, "step": 243 }, { "epoch": 0.51, "grad_norm": 36.0462298788176, "learning_rate": 2.8459229358538404e-07, "logits/generated": -0.3217124342918396, "logits/oppo_generated": -2.7591960430145264, "logits/oppo_real": -2.847045660018921, "logits/real": -2.0825626850128174, "logps/generated": -639.609375, "logps/oppo_gen": -79.41316986083984, "logps/oppo_real": -322.44171142578125, "logps/real": -304.0206298828125, "loss": 0.2817, "loss/gen": 0.08697028458118439, "loss/real": 0.18020153045654297, "rewards/accuracies": 1.0, "rewards/generated": -560.1961669921875, "rewards/margins": 578.6173706054688, "rewards/real": 18.421127319335938, "step": 244 }, { "epoch": 0.51, "grad_norm": 23.098431475342515, "learning_rate": 2.827824487755007e-07, "logits/generated": 0.6684847474098206, "logits/oppo_generated": -3.039127826690674, "logits/oppo_real": -2.9222187995910645, "logits/real": -2.522883892059326, "logps/generated": -825.6904296875, "logps/oppo_gen": -92.08659362792969, "logps/oppo_real": -440.71002197265625, "logps/real": -405.4791259765625, "loss": 0.252, "loss/gen": 0.0202939473092556, "loss/real": 0.14877164363861084, "rewards/accuracies": 1.0, "rewards/generated": -733.6038208007812, "rewards/margins": 768.834716796875, "rewards/real": 35.23091506958008, "step": 245 }, { "epoch": 0.51, "grad_norm": 32.593987819722024, "learning_rate": 2.8097085410968694e-07, "logits/generated": 0.48746663331985474, "logits/oppo_generated": -2.485867977142334, "logits/oppo_real": -2.4769599437713623, "logits/real": -1.8404998779296875, "logps/generated": -625.856201171875, "logps/oppo_gen": -90.03643798828125, "logps/oppo_real": -255.79519653320312, "logps/real": -247.8499298095703, "loss": 0.4168, "loss/gen": 0.13854430615901947, "loss/real": 0.21315725147724152, "rewards/accuracies": 1.0, "rewards/generated": -535.81982421875, "rewards/margins": 543.7650756835938, "rewards/real": 7.945267677307129, "step": 246 }, { "epoch": 0.52, "grad_norm": 36.158667270102754, "learning_rate": 2.7915760628693253e-07, "logits/generated": 1.4546080827713013, "logits/oppo_generated": -2.7927658557891846, "logits/oppo_real": -2.680619478225708, "logits/real": -2.005889415740967, "logps/generated": -741.1942138671875, "logps/oppo_gen": -81.70547485351562, "logps/oppo_real": -251.9884033203125, "logps/real": -246.9957733154297, "loss": 0.2965, "loss/gen": 0.0464542955160141, "loss/real": 0.23268568515777588, "rewards/accuracies": 1.0, "rewards/generated": -659.48876953125, "rewards/margins": 664.4813842773438, "rewards/real": 4.99260139465332, "step": 247 }, { "epoch": 0.52, "grad_norm": 46.582294450252235, "learning_rate": 2.7734280209446865e-07, "logits/generated": 2.0774784088134766, "logits/oppo_generated": -2.685457229614258, "logits/oppo_real": -2.7742578983306885, "logits/real": -1.0988413095474243, "logps/generated": -716.355224609375, "logps/oppo_gen": -73.68305206298828, "logps/oppo_real": -247.6187744140625, "logps/real": -261.7083435058594, "loss": 0.3984, "loss/gen": 0.022194834426045418, "loss/real": 0.318002313375473, "rewards/accuracies": 1.0, "rewards/generated": -642.6722412109375, "rewards/margins": 628.5826416015625, "rewards/real": -14.089559555053711, "step": 248 }, { "epoch": 0.52, "grad_norm": 39.37642916553157, "learning_rate": 2.755265384026023e-07, "logits/generated": 2.5619468688964844, "logits/oppo_generated": -2.7931642532348633, "logits/oppo_real": -2.6713364124298096, "logits/real": -1.2989791631698608, "logps/generated": -589.9859619140625, "logps/oppo_gen": -74.50320434570312, "logps/oppo_real": -232.06639099121094, "logps/real": -266.68707275390625, "loss": 0.343, "loss/gen": 0.11266843974590302, "loss/real": 0.4674380123615265, "rewards/accuracies": 0.9375, "rewards/generated": -515.4827880859375, "rewards/margins": 480.86212158203125, "rewards/real": -34.620670318603516, "step": 249 }, { "epoch": 0.52, "grad_norm": 30.31126366679761, "learning_rate": 2.7370891215954565e-07, "logits/generated": -0.13464397192001343, "logits/oppo_generated": -2.922173500061035, "logits/oppo_real": -2.915562391281128, "logits/real": -1.2431890964508057, "logps/generated": -568.3289184570312, "logps/oppo_gen": -93.9364013671875, "logps/oppo_real": -277.7060241699219, "logps/real": -319.4127197265625, "loss": 0.3484, "loss/gen": 0.1920367181301117, "loss/real": 0.4756568372249603, "rewards/accuracies": 0.8125, "rewards/generated": -474.39251708984375, "rewards/margins": 432.685791015625, "rewards/real": -41.70672607421875, "step": 250 }, { "epoch": 0.53, "grad_norm": 32.90219882469863, "learning_rate": 2.7189002038624057e-07, "logits/generated": 0.6835288405418396, "logits/oppo_generated": -2.8348021507263184, "logits/oppo_real": -2.6828556060791016, "logits/real": -1.7274892330169678, "logps/generated": -670.5845336914062, "logps/oppo_gen": -77.95462036132812, "logps/oppo_real": -321.50152587890625, "logps/real": -326.0620422363281, "loss": 0.2903, "loss/gen": 0.0680694729089737, "loss/real": 0.2676253318786621, "rewards/accuracies": 1.0, "rewards/generated": -592.6298828125, "rewards/margins": 588.0693969726562, "rewards/real": -4.560503959655762, "step": 251 }, { "epoch": 0.53, "grad_norm": 30.531534871048184, "learning_rate": 2.7006996017118027e-07, "logits/generated": 1.0771749019622803, "logits/oppo_generated": -2.705852508544922, "logits/oppo_real": -2.7847092151641846, "logits/real": -1.856350064277649, "logps/generated": -580.4610595703125, "logps/oppo_gen": -66.31861877441406, "logps/oppo_real": -250.60986328125, "logps/real": -234.23973083496094, "loss": 0.2796, "loss/gen": 0.09471721947193146, "loss/real": 0.18563194572925568, "rewards/accuracies": 1.0, "rewards/generated": -514.1424560546875, "rewards/margins": 530.5125732421875, "rewards/real": 16.370126724243164, "step": 252 }, { "epoch": 0.53, "grad_norm": 25.42876102382952, "learning_rate": 2.682488286652269e-07, "logits/generated": 1.3248242139816284, "logits/oppo_generated": -2.7483713626861572, "logits/oppo_real": -2.829575538635254, "logits/real": -1.816493034362793, "logps/generated": -644.23974609375, "logps/oppo_gen": -80.60096740722656, "logps/oppo_real": -285.4813232421875, "logps/real": -277.68682861328125, "loss": 0.3086, "loss/gen": 0.06294162571430206, "loss/real": 0.2093324214220047, "rewards/accuracies": 1.0, "rewards/generated": -563.6387939453125, "rewards/margins": 571.4332275390625, "rewards/real": 7.794487953186035, "step": 253 }, { "epoch": 0.53, "grad_norm": 21.81948661948513, "learning_rate": 2.6642672307642573e-07, "logits/generated": 0.9607592821121216, "logits/oppo_generated": -2.632258653640747, "logits/oppo_real": -2.55385160446167, "logits/real": -2.0827624797821045, "logps/generated": -574.7434692382812, "logps/oppo_gen": -69.48466491699219, "logps/oppo_real": -261.5279235839844, "logps/real": -236.08151245117188, "loss": 0.3025, "loss/gen": 0.11658591777086258, "loss/real": 0.16278226673603058, "rewards/accuracies": 1.0, "rewards/generated": -505.25885009765625, "rewards/margins": 530.7052612304688, "rewards/real": 25.446413040161133, "step": 254 }, { "epoch": 0.53, "grad_norm": 34.6780286900528, "learning_rate": 2.646037406648165e-07, "logits/generated": 0.40537479519844055, "logits/oppo_generated": -2.7576169967651367, "logits/oppo_real": -2.7069432735443115, "logits/real": -2.0863590240478516, "logps/generated": -679.2177734375, "logps/oppo_gen": -119.10911560058594, "logps/oppo_real": -359.0958557128906, "logps/real": -355.48931884765625, "loss": 0.4468, "loss/gen": 0.06933214515447617, "loss/real": 0.28164565563201904, "rewards/accuracies": 1.0, "rewards/generated": -560.108642578125, "rewards/margins": 563.7152099609375, "rewards/real": 3.6065492630004883, "step": 255 }, { "epoch": 0.54, "grad_norm": 23.262374374157027, "learning_rate": 2.6277997873724176e-07, "logits/generated": 2.0152463912963867, "logits/oppo_generated": -2.6790993213653564, "logits/oppo_real": -2.67695951461792, "logits/real": -1.8701472282409668, "logps/generated": -630.0718383789062, "logps/oppo_gen": -81.55001831054688, "logps/oppo_real": -244.86224365234375, "logps/real": -256.7394104003906, "loss": 0.3128, "loss/gen": 0.04411861300468445, "loss/real": 0.38423439860343933, "rewards/accuracies": 1.0, "rewards/generated": -548.5217895507812, "rewards/margins": 536.6446533203125, "rewards/real": -11.87718391418457, "step": 256 }, { "epoch": 0.54, "grad_norm": 26.47014389961653, "learning_rate": 2.609555346421532e-07, "logits/generated": 0.46947139501571655, "logits/oppo_generated": -2.7998907566070557, "logits/oppo_real": -2.928711414337158, "logits/real": -1.9949252605438232, "logps/generated": -624.2977294921875, "logps/oppo_gen": -78.38945007324219, "logps/oppo_real": -242.44906616210938, "logps/real": -243.96652221679688, "loss": 0.3466, "loss/gen": 0.06059020385146141, "loss/real": 0.25872138142585754, "rewards/accuracies": 0.9375, "rewards/generated": -545.9083251953125, "rewards/margins": 544.3908081054688, "rewards/real": -1.5174579620361328, "step": 257 }, { "epoch": 0.54, "grad_norm": 44.01876569598968, "learning_rate": 2.5913050576441473e-07, "logits/generated": 0.2901713252067566, "logits/oppo_generated": -2.3973050117492676, "logits/oppo_real": -2.54941463470459, "logits/real": -1.5007007122039795, "logps/generated": -672.2840576171875, "logps/oppo_gen": -78.8645248413086, "logps/oppo_real": -219.2704620361328, "logps/real": -222.47265625, "loss": 0.3215, "loss/gen": 0.05777715891599655, "loss/real": 0.27797359228134155, "rewards/accuracies": 1.0, "rewards/generated": -593.4195556640625, "rewards/margins": 590.2173461914062, "rewards/real": -3.202197551727295, "step": 258 }, { "epoch": 0.54, "grad_norm": 27.867143695648217, "learning_rate": 2.5730498952010496e-07, "logits/generated": 1.1024194955825806, "logits/oppo_generated": -2.8151259422302246, "logits/oppo_real": -2.8964319229125977, "logits/real": -2.0106348991394043, "logps/generated": -623.4109497070312, "logps/oppo_gen": -62.03562927246094, "logps/oppo_real": -338.70721435546875, "logps/real": -356.89959716796875, "loss": 0.3348, "loss/gen": 0.13135670125484467, "loss/real": 0.4308997392654419, "rewards/accuracies": 1.0, "rewards/generated": -561.3753051757812, "rewards/margins": 543.1829223632812, "rewards/real": -18.192373275756836, "step": 259 }, { "epoch": 0.54, "grad_norm": 43.278825800792156, "learning_rate": 2.55479083351317e-07, "logits/generated": 0.6705357432365417, "logits/oppo_generated": -2.8476147651672363, "logits/oppo_real": -3.038517713546753, "logits/real": -2.0954232215881348, "logps/generated": -606.3067626953125, "logps/oppo_gen": -65.98280334472656, "logps/oppo_real": -297.4833984375, "logps/real": -295.8573913574219, "loss": 0.347, "loss/gen": 0.13984668254852295, "loss/real": 0.2379724085330963, "rewards/accuracies": 0.9375, "rewards/generated": -540.323974609375, "rewards/margins": 541.9500122070312, "rewards/real": 1.6260404586791992, "step": 260 }, { "epoch": 0.55, "grad_norm": 33.369751020993924, "learning_rate": 2.536528847209573e-07, "logits/generated": 0.027688533067703247, "logits/oppo_generated": -2.5906708240509033, "logits/oppo_real": -2.67025089263916, "logits/real": -1.7617822885513306, "logps/generated": -647.2080078125, "logps/oppo_gen": -73.23574829101562, "logps/oppo_real": -239.16107177734375, "logps/real": -228.57687377929688, "loss": 0.3379, "loss/gen": 0.24388740956783295, "loss/real": 0.1990511417388916, "rewards/accuracies": 0.9375, "rewards/generated": -573.9722290039062, "rewards/margins": 584.556396484375, "rewards/real": 10.584165573120117, "step": 261 }, { "epoch": 0.55, "grad_norm": 47.93150019493965, "learning_rate": 2.5182649110754325e-07, "logits/generated": 0.09529060125350952, "logits/oppo_generated": -2.763000011444092, "logits/oppo_real": -2.8288025856018066, "logits/real": -2.132340431213379, "logps/generated": -620.471435546875, "logps/oppo_gen": -84.12704467773438, "logps/oppo_real": -251.29183959960938, "logps/real": -241.84896850585938, "loss": 0.3577, "loss/gen": 0.2290661633014679, "loss/real": 0.21368204057216644, "rewards/accuracies": 1.0, "rewards/generated": -536.3443603515625, "rewards/margins": 545.7872314453125, "rewards/real": 9.442842483520508, "step": 262 }, { "epoch": 0.55, "grad_norm": 75.39146534505745, "learning_rate": 2.5e-07, "logits/generated": -0.36547717452049255, "logits/oppo_generated": -2.7416415214538574, "logits/oppo_real": -2.677898645401001, "logits/real": -2.1685986518859863, "logps/generated": -853.912841796875, "logps/oppo_gen": -94.32908630371094, "logps/oppo_real": -302.0242614746094, "logps/real": -274.1735534667969, "loss": 0.3866, "loss/gen": 0.07128161191940308, "loss/real": 0.1598164141178131, "rewards/accuracies": 1.0, "rewards/generated": -759.583740234375, "rewards/margins": 787.4345092773438, "rewards/real": 27.850727081298828, "step": 263 }, { "epoch": 0.55, "grad_norm": 44.66155824818755, "learning_rate": 2.4817350889245673e-07, "logits/generated": -0.10230934619903564, "logits/oppo_generated": -2.799670696258545, "logits/oppo_real": -2.7438480854034424, "logits/real": -2.254028081893921, "logps/generated": -686.68310546875, "logps/oppo_gen": -80.21273803710938, "logps/oppo_real": -327.29644775390625, "logps/real": -294.9067077636719, "loss": 0.391, "loss/gen": 0.07902692258358002, "loss/real": 0.15174683928489685, "rewards/accuracies": 1.0, "rewards/generated": -606.4703369140625, "rewards/margins": 638.8599853515625, "rewards/real": 32.38971710205078, "step": 264 }, { "epoch": 0.55, "grad_norm": 18.82427994970857, "learning_rate": 2.463471152790427e-07, "logits/generated": 0.20839297771453857, "logits/oppo_generated": -2.7984328269958496, "logits/oppo_real": -2.7523856163024902, "logits/real": -2.133162498474121, "logps/generated": -691.3712768554688, "logps/oppo_gen": -82.20599365234375, "logps/oppo_real": -347.56866455078125, "logps/real": -320.5689697265625, "loss": 0.2817, "loss/gen": 0.15159496665000916, "loss/real": 0.16067659854888916, "rewards/accuracies": 0.9375, "rewards/generated": -609.165283203125, "rewards/margins": 636.1649169921875, "rewards/real": 26.999671936035156, "step": 265 }, { "epoch": 0.56, "grad_norm": 75.24201512349167, "learning_rate": 2.44520916648683e-07, "logits/generated": -0.19325079023838043, "logits/oppo_generated": -2.7985243797302246, "logits/oppo_real": -2.8215994834899902, "logits/real": -2.207157611846924, "logps/generated": -727.372802734375, "logps/oppo_gen": -86.37100219726562, "logps/oppo_real": -314.001708984375, "logps/real": -294.5711975097656, "loss": 0.4154, "loss/gen": 0.032481130212545395, "loss/real": 0.22301779687404633, "rewards/accuracies": 1.0, "rewards/generated": -641.0017700195312, "rewards/margins": 660.4323120117188, "rewards/real": 19.43051528930664, "step": 266 }, { "epoch": 0.56, "grad_norm": 27.517901887978418, "learning_rate": 2.426950104798951e-07, "logits/generated": 0.02050509676337242, "logits/oppo_generated": -2.809068202972412, "logits/oppo_real": -2.782775402069092, "logits/real": -2.2482824325561523, "logps/generated": -662.8475341796875, "logps/oppo_gen": -91.16156005859375, "logps/oppo_real": -328.0069580078125, "logps/real": -295.89288330078125, "loss": 0.276, "loss/gen": 0.0662633553147316, "loss/real": 0.15361186861991882, "rewards/accuracies": 1.0, "rewards/generated": -571.6859741210938, "rewards/margins": 603.800048828125, "rewards/real": 32.11410140991211, "step": 267 }, { "epoch": 0.56, "grad_norm": 58.34852643477276, "learning_rate": 2.4086949423558525e-07, "logits/generated": 0.4598902761936188, "logits/oppo_generated": -2.640536308288574, "logits/oppo_real": -2.6269845962524414, "logits/real": -1.9713550806045532, "logps/generated": -609.9164428710938, "logps/oppo_gen": -77.98777770996094, "logps/oppo_real": -400.0408935546875, "logps/real": -374.6983642578125, "loss": 0.3319, "loss/gen": 0.17593906819820404, "loss/real": 0.16186195611953735, "rewards/accuracies": 1.0, "rewards/generated": -531.9286499023438, "rewards/margins": 557.2711791992188, "rewards/real": 25.342538833618164, "step": 268 }, { "epoch": 0.56, "grad_norm": 43.833663030237815, "learning_rate": 2.3904446535784686e-07, "logits/generated": -0.2348947525024414, "logits/oppo_generated": -2.701582431793213, "logits/oppo_real": -2.8296256065368652, "logits/real": -2.0177409648895264, "logps/generated": -653.5576171875, "logps/oppo_gen": -91.32444763183594, "logps/oppo_real": -384.9336853027344, "logps/real": -351.2134094238281, "loss": 0.3148, "loss/gen": 0.1337149441242218, "loss/real": 0.15142256021499634, "rewards/accuracies": 1.0, "rewards/generated": -562.233154296875, "rewards/margins": 595.9534912109375, "rewards/real": 33.720272064208984, "step": 269 }, { "epoch": 0.56, "grad_norm": 29.247056601036263, "learning_rate": 2.3722002126275822e-07, "logits/generated": -0.4930816888809204, "logits/oppo_generated": -2.8453965187072754, "logits/oppo_real": -2.7912707328796387, "logits/real": -2.370246410369873, "logps/generated": -680.9036865234375, "logps/oppo_gen": -103.50727844238281, "logps/oppo_real": -434.1189270019531, "logps/real": -401.18719482421875, "loss": 0.3351, "loss/gen": 0.15950866043567657, "loss/real": 0.15047958493232727, "rewards/accuracies": 1.0, "rewards/generated": -577.3963623046875, "rewards/margins": 610.328125, "rewards/real": 32.93174362182617, "step": 270 }, { "epoch": 0.57, "grad_norm": 29.402288480169258, "learning_rate": 2.353962593351835e-07, "logits/generated": 1.3464157581329346, "logits/oppo_generated": -2.815722942352295, "logits/oppo_real": -2.8115828037261963, "logits/real": -1.9343657493591309, "logps/generated": -742.6785888671875, "logps/oppo_gen": -90.67955780029297, "logps/oppo_real": -319.91876220703125, "logps/real": -318.8854064941406, "loss": 0.31, "loss/gen": 0.030156534165143967, "loss/real": 0.26410186290740967, "rewards/accuracies": 1.0, "rewards/generated": -651.9990234375, "rewards/margins": 653.032470703125, "rewards/real": 1.033339500427246, "step": 271 }, { "epoch": 0.57, "grad_norm": 32.55452345021061, "learning_rate": 2.3357327692357428e-07, "logits/generated": 1.7179875373840332, "logits/oppo_generated": -2.6796679496765137, "logits/oppo_real": -2.7324979305267334, "logits/real": -1.4703872203826904, "logps/generated": -692.619384765625, "logps/oppo_gen": -64.20418548583984, "logps/oppo_real": -228.74159240722656, "logps/real": -244.6279296875, "loss": 0.3772, "loss/gen": 0.03423958644270897, "loss/real": 0.4124412536621094, "rewards/accuracies": 1.0, "rewards/generated": -628.4152221679688, "rewards/margins": 612.5289306640625, "rewards/real": -15.886341094970703, "step": 272 }, { "epoch": 0.57, "grad_norm": 27.68333221135452, "learning_rate": 2.317511713347731e-07, "logits/generated": -0.3925975561141968, "logits/oppo_generated": -2.623629093170166, "logits/oppo_real": -2.891550064086914, "logits/real": -1.9939806461334229, "logps/generated": -675.5734252929688, "logps/oppo_gen": -77.46165466308594, "logps/oppo_real": -376.2138366699219, "logps/real": -352.5907897949219, "loss": 0.3106, "loss/gen": 0.11591622233390808, "loss/real": 0.16552887856960297, "rewards/accuracies": 1.0, "rewards/generated": -598.11181640625, "rewards/margins": 621.7348022460938, "rewards/real": 23.623062133789062, "step": 273 }, { "epoch": 0.57, "grad_norm": 64.98134122216354, "learning_rate": 2.2993003982881973e-07, "logits/generated": 0.04800446331501007, "logits/oppo_generated": -2.7032546997070312, "logits/oppo_real": -2.7032618522644043, "logits/real": -1.8638030290603638, "logps/generated": -693.61572265625, "logps/oppo_gen": -91.1432876586914, "logps/oppo_real": -297.7861633300781, "logps/real": -308.7113037109375, "loss": 0.3949, "loss/gen": 0.04603281617164612, "loss/real": 0.28950807452201843, "rewards/accuracies": 1.0, "rewards/generated": -602.472412109375, "rewards/margins": 591.5473022460938, "rewards/real": -10.925130844116211, "step": 274 }, { "epoch": 0.58, "grad_norm": 39.85445015635801, "learning_rate": 2.2810997961375938e-07, "logits/generated": 2.8605520725250244, "logits/oppo_generated": -2.9404988288879395, "logits/oppo_real": -2.5591325759887695, "logits/real": -1.763561725616455, "logps/generated": -583.0770263671875, "logps/oppo_gen": -56.471839904785156, "logps/oppo_real": -241.88677978515625, "logps/real": -262.81781005859375, "loss": 0.3835, "loss/gen": 0.06709041446447372, "loss/real": 0.4117059111595154, "rewards/accuracies": 1.0, "rewards/generated": -526.605224609375, "rewards/margins": 505.6741943359375, "rewards/real": -20.931013107299805, "step": 275 }, { "epoch": 0.58, "grad_norm": 26.09332545500441, "learning_rate": 2.2629108784045436e-07, "logits/generated": 0.5841866731643677, "logits/oppo_generated": -2.6106820106506348, "logits/oppo_real": -2.7232418060302734, "logits/real": -1.913360834121704, "logps/generated": -659.1812744140625, "logps/oppo_gen": -76.83047485351562, "logps/oppo_real": -281.0586242675781, "logps/real": -261.4032287597656, "loss": 0.2957, "loss/gen": 0.025033961981534958, "loss/real": 0.1760440468788147, "rewards/accuracies": 1.0, "rewards/generated": -582.3507690429688, "rewards/margins": 602.0061645507812, "rewards/real": 19.655406951904297, "step": 276 }, { "epoch": 0.58, "grad_norm": 38.50617594539219, "learning_rate": 2.2447346159739768e-07, "logits/generated": -0.026605768129229546, "logits/oppo_generated": -2.732983112335205, "logits/oppo_real": -2.733703136444092, "logits/real": -2.2098536491394043, "logps/generated": -640.650634765625, "logps/oppo_gen": -102.6632080078125, "logps/oppo_real": -385.54461669921875, "logps/real": -364.49481201171875, "loss": 0.36, "loss/gen": 0.17120496928691864, "loss/real": 0.178038090467453, "rewards/accuracies": 1.0, "rewards/generated": -537.9873657226562, "rewards/margins": 559.0371704101562, "rewards/real": 21.049800872802734, "step": 277 }, { "epoch": 0.58, "grad_norm": 30.374145998154212, "learning_rate": 2.2265719790553146e-07, "logits/generated": 0.42899081110954285, "logits/oppo_generated": -2.741022825241089, "logits/oppo_real": -2.7121999263763428, "logits/real": -2.1624093055725098, "logps/generated": -740.4405517578125, "logps/oppo_gen": -90.87716674804688, "logps/oppo_real": -406.1678161621094, "logps/real": -392.99517822265625, "loss": 0.3204, "loss/gen": 0.020876560360193253, "loss/real": 0.19476071000099182, "rewards/accuracies": 1.0, "rewards/generated": -649.5633544921875, "rewards/margins": 662.7359619140625, "rewards/real": 13.172629356384277, "step": 278 }, { "epoch": 0.58, "grad_norm": 31.19044129103799, "learning_rate": 2.2084239371306752e-07, "logits/generated": 1.6656911373138428, "logits/oppo_generated": -2.8956916332244873, "logits/oppo_real": -2.856825351715088, "logits/real": -2.215172052383423, "logps/generated": -657.0090942382812, "logps/oppo_gen": -64.58488464355469, "logps/oppo_real": -211.9491424560547, "logps/real": -219.18777465820312, "loss": 0.3131, "loss/gen": 0.03627927601337433, "loss/real": 0.4412737786769867, "rewards/accuracies": 0.9375, "rewards/generated": -592.4242553710938, "rewards/margins": 585.1856689453125, "rewards/real": -7.238637924194336, "step": 279 }, { "epoch": 0.59, "grad_norm": 31.520425622845142, "learning_rate": 2.19029145890313e-07, "logits/generated": 0.3855173587799072, "logits/oppo_generated": -2.7485857009887695, "logits/oppo_real": -2.783714771270752, "logits/real": -2.1652963161468506, "logps/generated": -678.6658935546875, "logps/oppo_gen": -70.05374145507812, "logps/oppo_real": -304.0491638183594, "logps/real": -283.9021911621094, "loss": 0.3017, "loss/gen": 0.03154566138982773, "loss/real": 0.17599177360534668, "rewards/accuracies": 1.0, "rewards/generated": -608.6121826171875, "rewards/margins": 628.7591552734375, "rewards/real": 20.14695930480957, "step": 280 }, { "epoch": 0.59, "grad_norm": 35.63404319158419, "learning_rate": 2.172175512244993e-07, "logits/generated": 0.7677186727523804, "logits/oppo_generated": -2.8187942504882812, "logits/oppo_real": -2.884047508239746, "logits/real": -2.267946243286133, "logps/generated": -705.670654296875, "logps/oppo_gen": -83.02871704101562, "logps/oppo_real": -363.13525390625, "logps/real": -324.23687744140625, "loss": 0.277, "loss/gen": 0.04462321102619171, "loss/real": 0.14266663789749146, "rewards/accuracies": 1.0, "rewards/generated": -622.6419067382812, "rewards/margins": 661.540283203125, "rewards/real": 38.898380279541016, "step": 281 }, { "epoch": 0.59, "grad_norm": 44.425770087321645, "learning_rate": 2.154077064146159e-07, "logits/generated": -0.42760705947875977, "logits/oppo_generated": -2.8119869232177734, "logits/oppo_real": -3.1372385025024414, "logits/real": -2.188045024871826, "logps/generated": -667.8062744140625, "logps/oppo_gen": -74.51792907714844, "logps/oppo_real": -328.4604187011719, "logps/real": -306.0193786621094, "loss": 0.3065, "loss/gen": 0.048910096287727356, "loss/real": 0.18809708952903748, "rewards/accuracies": 1.0, "rewards/generated": -593.2882690429688, "rewards/margins": 615.7293701171875, "rewards/real": 22.441036224365234, "step": 282 }, { "epoch": 0.59, "grad_norm": 24.95744791726447, "learning_rate": 2.1359970806624884e-07, "logits/generated": 0.22146156430244446, "logits/oppo_generated": -2.6513538360595703, "logits/oppo_real": -2.6379599571228027, "logits/real": -1.8909337520599365, "logps/generated": -693.6986694335938, "logps/oppo_gen": -84.63557434082031, "logps/oppo_real": -237.87828063964844, "logps/real": -236.34927368164062, "loss": 0.3319, "loss/gen": 0.09527064859867096, "loss/real": 0.2507627606391907, "rewards/accuracies": 1.0, "rewards/generated": -609.0631103515625, "rewards/margins": 610.5921020507812, "rewards/real": 1.529006004333496, "step": 283 }, { "epoch": 0.59, "grad_norm": 28.732460045285126, "learning_rate": 2.1179365268642375e-07, "logits/generated": 0.1767929643392563, "logits/oppo_generated": -2.7945454120635986, "logits/oppo_real": -2.902392864227295, "logits/real": -2.1974804401397705, "logps/generated": -644.111572265625, "logps/oppo_gen": -74.51861572265625, "logps/oppo_real": -385.30194091796875, "logps/real": -357.007080078125, "loss": 0.3031, "loss/gen": 0.05500415712594986, "loss/real": 0.16526341438293457, "rewards/accuracies": 1.0, "rewards/generated": -569.5928955078125, "rewards/margins": 597.8878173828125, "rewards/real": 28.294912338256836, "step": 284 }, { "epoch": 0.6, "grad_norm": 23.633269623678157, "learning_rate": 2.0998963667845536e-07, "logits/generated": -0.31503739953041077, "logits/oppo_generated": -2.682985782623291, "logits/oppo_real": -2.8302135467529297, "logits/real": -2.067976951599121, "logps/generated": -667.347900390625, "logps/oppo_gen": -108.3177261352539, "logps/oppo_real": -433.48614501953125, "logps/real": -399.8238525390625, "loss": 0.3811, "loss/gen": 0.1597834676504135, "loss/real": 0.1646268665790558, "rewards/accuracies": 1.0, "rewards/generated": -559.0302124023438, "rewards/margins": 592.6925048828125, "rewards/real": 33.66226577758789, "step": 285 }, { "epoch": 0.6, "grad_norm": 34.15444156923445, "learning_rate": 2.0818775633680055e-07, "logits/generated": -0.049648359417915344, "logits/oppo_generated": -2.8504347801208496, "logits/oppo_real": -2.82558536529541, "logits/real": -2.302478790283203, "logps/generated": -782.4987182617188, "logps/oppo_gen": -89.17402648925781, "logps/oppo_real": -381.289794921875, "logps/real": -370.0972900390625, "loss": 0.323, "loss/gen": 0.004926434252411127, "loss/real": 0.2192765325307846, "rewards/accuracies": 1.0, "rewards/generated": -693.32470703125, "rewards/margins": 704.5172119140625, "rewards/real": 11.192514419555664, "step": 286 }, { "epoch": 0.6, "grad_norm": 34.53213198817063, "learning_rate": 2.0638810784191946e-07, "logits/generated": 0.74876868724823, "logits/oppo_generated": -2.820481777191162, "logits/oppo_real": -2.9427828788757324, "logits/real": -2.221226215362549, "logps/generated": -807.7759399414062, "logps/oppo_gen": -93.68537902832031, "logps/oppo_real": -462.7519836425781, "logps/real": -431.6222839355469, "loss": 0.3505, "loss/gen": 0.00832824781537056, "loss/real": 0.15834376215934753, "rewards/accuracies": 1.0, "rewards/generated": -714.090576171875, "rewards/margins": 745.2202758789062, "rewards/real": 31.129711151123047, "step": 287 }, { "epoch": 0.6, "grad_norm": 28.628067052187287, "learning_rate": 2.0459078725514089e-07, "logits/generated": 1.6910818815231323, "logits/oppo_generated": -2.6388208866119385, "logits/oppo_real": -2.623079538345337, "logits/real": -1.9768972396850586, "logps/generated": -802.6104736328125, "logps/oppo_gen": -63.859310150146484, "logps/oppo_real": -286.3548889160156, "logps/real": -259.7296142578125, "loss": 0.4453, "loss/gen": 0.025440678000450134, "loss/real": 0.1741112470626831, "rewards/accuracies": 1.0, "rewards/generated": -738.7510986328125, "rewards/margins": 765.3764038085938, "rewards/real": 26.625244140625, "step": 288 }, { "epoch": 0.6, "grad_norm": 58.26362627303912, "learning_rate": 2.027958905135349e-07, "logits/generated": 0.4634339213371277, "logits/oppo_generated": -2.668745517730713, "logits/oppo_real": -2.7086033821105957, "logits/real": -2.0303690433502197, "logps/generated": -743.83544921875, "logps/oppo_gen": -85.07185363769531, "logps/oppo_real": -261.210205078125, "logps/real": -244.32540893554688, "loss": 0.3133, "loss/gen": 0.026636935770511627, "loss/real": 0.1779983788728714, "rewards/accuracies": 1.0, "rewards/generated": -658.7635498046875, "rewards/margins": 675.6484375, "rewards/real": 16.884801864624023, "step": 289 }, { "epoch": 0.61, "grad_norm": 29.702995544825736, "learning_rate": 2.0100351342479216e-07, "logits/generated": 0.3994565010070801, "logits/oppo_generated": -2.7590723037719727, "logits/oppo_real": -2.840005874633789, "logits/real": -2.039400100708008, "logps/generated": -675.1010131835938, "logps/oppo_gen": -75.13490295410156, "logps/oppo_real": -314.7492370605469, "logps/real": -311.7596435546875, "loss": 0.3358, "loss/gen": 0.09637254476547241, "loss/real": 0.2566570043563843, "rewards/accuracies": 0.9375, "rewards/generated": -599.9661865234375, "rewards/margins": 602.9556884765625, "rewards/real": 2.989558219909668, "step": 290 }, { "epoch": 0.61, "grad_norm": 58.494534750719446, "learning_rate": 1.9921375166210945e-07, "logits/generated": 0.22771090269088745, "logits/oppo_generated": -2.8160781860351562, "logits/oppo_real": -2.926997184753418, "logits/real": -1.7282150983810425, "logps/generated": -617.6701049804688, "logps/oppo_gen": -77.46833801269531, "logps/oppo_real": -310.57672119140625, "logps/real": -323.9449462890625, "loss": 0.3855, "loss/gen": 0.17424950003623962, "loss/real": 0.40698057413101196, "rewards/accuracies": 0.875, "rewards/generated": -540.2017822265625, "rewards/margins": 526.8335571289062, "rewards/real": -13.368255615234375, "step": 291 }, { "epoch": 0.61, "grad_norm": 34.476149123683136, "learning_rate": 1.9742670075908349e-07, "logits/generated": 0.3010374903678894, "logits/oppo_generated": -2.6675429344177246, "logits/oppo_real": -2.6383228302001953, "logits/real": -1.7668535709381104, "logps/generated": -607.7005615234375, "logps/oppo_gen": -62.1541748046875, "logps/oppo_real": -173.223388671875, "logps/real": -186.780029296875, "loss": 0.3663, "loss/gen": 0.16208486258983612, "loss/real": 0.2849530577659607, "rewards/accuracies": 1.0, "rewards/generated": -545.54638671875, "rewards/margins": 531.98974609375, "rewards/real": -13.556650161743164, "step": 292 }, { "epoch": 0.61, "grad_norm": 36.1219215005324, "learning_rate": 1.9564245610461078e-07, "logits/generated": -0.7002210021018982, "logits/oppo_generated": -2.663135528564453, "logits/oppo_real": -2.7156057357788086, "logits/real": -1.9735496044158936, "logps/generated": -620.8001098632812, "logps/oppo_gen": -81.85824584960938, "logps/oppo_real": -176.90597534179688, "logps/real": -168.76197814941406, "loss": 0.3097, "loss/gen": 0.1982150822877884, "loss/real": 0.19656775891780853, "rewards/accuracies": 1.0, "rewards/generated": -538.94189453125, "rewards/margins": 547.0858764648438, "rewards/real": 8.144001007080078, "step": 293 }, { "epoch": 0.62, "grad_norm": 38.90704849833205, "learning_rate": 1.938611129377967e-07, "logits/generated": -0.833325982093811, "logits/oppo_generated": -2.8089256286621094, "logits/oppo_real": -3.007702112197876, "logits/real": -2.1018872261047363, "logps/generated": -722.702392578125, "logps/oppo_gen": -64.71053314208984, "logps/oppo_real": -203.855224609375, "logps/real": -210.36614990234375, "loss": 0.3285, "loss/gen": 0.09308800101280212, "loss/real": 0.2549854516983032, "rewards/accuracies": 1.0, "rewards/generated": -657.9918823242188, "rewards/margins": 651.48095703125, "rewards/real": -6.510924339294434, "step": 294 }, { "epoch": 0.62, "grad_norm": 67.77022011455327, "learning_rate": 1.920827663428714e-07, "logits/generated": -0.717969536781311, "logits/oppo_generated": -2.7657203674316406, "logits/oppo_real": -2.7387871742248535, "logits/real": -2.205482006072998, "logps/generated": -609.5360717773438, "logps/oppo_gen": -70.42521667480469, "logps/oppo_real": -208.49960327148438, "logps/real": -188.78085327148438, "loss": 0.2982, "loss/gen": 0.0571298748254776, "loss/real": 0.17368248105049133, "rewards/accuracies": 1.0, "rewards/generated": -539.1109008789062, "rewards/margins": 558.82958984375, "rewards/real": 19.718734741210938, "step": 295 }, { "epoch": 0.62, "grad_norm": 51.58006802989649, "learning_rate": 1.9030751124411448e-07, "logits/generated": 0.027949482202529907, "logits/oppo_generated": -2.8746414184570312, "logits/oppo_real": -2.7726399898529053, "logits/real": -2.319335460662842, "logps/generated": -620.2716674804688, "logps/oppo_gen": -74.33796691894531, "logps/oppo_real": -340.18280029296875, "logps/real": -341.213134765625, "loss": 0.4032, "loss/gen": 0.15857474505901337, "loss/real": 0.33517903089523315, "rewards/accuracies": 1.0, "rewards/generated": -545.9337158203125, "rewards/margins": 544.9034423828125, "rewards/real": -1.030303955078125, "step": 296 }, { "epoch": 0.62, "grad_norm": 27.70785924367286, "learning_rate": 1.8853544240078836e-07, "logits/generated": 0.41290417313575745, "logits/oppo_generated": -2.5701441764831543, "logits/oppo_real": -2.689274311065674, "logits/real": -1.8879668712615967, "logps/generated": -677.0875244140625, "logps/oppo_gen": -80.02645874023438, "logps/oppo_real": -254.08285522460938, "logps/real": -222.24395751953125, "loss": 0.3388, "loss/gen": 0.09368044137954712, "loss/real": 0.15713843703269958, "rewards/accuracies": 1.0, "rewards/generated": -597.06103515625, "rewards/margins": 628.8999633789062, "rewards/real": 31.83889389038086, "step": 297 }, { "epoch": 0.62, "grad_norm": 23.666014899650275, "learning_rate": 1.8676665440207977e-07, "logits/generated": -0.5900826454162598, "logits/oppo_generated": -2.7572181224823, "logits/oppo_real": -2.813715696334839, "logits/real": -2.052342414855957, "logps/generated": -579.000732421875, "logps/oppo_gen": -73.36943817138672, "logps/oppo_real": -282.2958679199219, "logps/real": -263.3672180175781, "loss": 0.2496, "loss/gen": 0.19655288755893707, "loss/real": 0.1768965721130371, "rewards/accuracies": 0.9375, "rewards/generated": -505.6312255859375, "rewards/margins": 524.5599365234375, "rewards/real": 18.92863655090332, "step": 298 }, { "epoch": 0.63, "grad_norm": 30.369390275780255, "learning_rate": 1.850012416620515e-07, "logits/generated": 0.3133728504180908, "logits/oppo_generated": -2.6170716285705566, "logits/oppo_real": -2.797962188720703, "logits/real": -1.5772523880004883, "logps/generated": -602.629638671875, "logps/oppo_gen": -57.24889373779297, "logps/oppo_real": -146.9036865234375, "logps/real": -174.32469177246094, "loss": 0.2935, "loss/gen": 0.060614556074142456, "loss/real": 0.3480251729488373, "rewards/accuracies": 1.0, "rewards/generated": -545.3807373046875, "rewards/margins": 517.9597778320312, "rewards/real": -27.420988082885742, "step": 299 }, { "epoch": 0.63, "grad_norm": 42.82485707844292, "learning_rate": 1.8323929841460178e-07, "logits/generated": 1.209000825881958, "logits/oppo_generated": -2.8536508083343506, "logits/oppo_real": -2.8496203422546387, "logits/real": -2.18961238861084, "logps/generated": -718.6183471679688, "logps/oppo_gen": -65.0604476928711, "logps/oppo_real": -244.0458526611328, "logps/real": -218.15106201171875, "loss": 0.3998, "loss/gen": 0.022837601602077484, "loss/real": 0.16522105038166046, "rewards/accuracies": 1.0, "rewards/generated": -653.557861328125, "rewards/margins": 679.45263671875, "rewards/real": 25.894800186157227, "step": 300 }, { "epoch": 0.63, "grad_norm": 24.93317607211261, "learning_rate": 1.8148091870843552e-07, "logits/generated": 0.9332234263420105, "logits/oppo_generated": -2.770528793334961, "logits/oppo_real": -2.8234052658081055, "logits/real": -2.085968494415283, "logps/generated": -644.9743041992188, "logps/oppo_gen": -78.03842163085938, "logps/oppo_real": -221.58538818359375, "logps/real": -202.37620544433594, "loss": 0.2559, "loss/gen": 0.16635200381278992, "loss/real": 0.1705245077610016, "rewards/accuracies": 1.0, "rewards/generated": -566.9358520507812, "rewards/margins": 586.14501953125, "rewards/real": 19.20915412902832, "step": 301 }, { "epoch": 0.63, "grad_norm": 26.63787215432487, "learning_rate": 1.7972619640204294e-07, "logits/generated": 1.1756725311279297, "logits/oppo_generated": -2.713470935821533, "logits/oppo_real": -2.8412275314331055, "logits/real": -2.0337021350860596, "logps/generated": -819.2066650390625, "logps/oppo_gen": -77.62155151367188, "logps/oppo_real": -245.73736572265625, "logps/real": -243.28323364257812, "loss": 0.2739, "loss/gen": 0.032348792999982834, "loss/real": 0.2555205523967743, "rewards/accuracies": 1.0, "rewards/generated": -741.5850830078125, "rewards/margins": 744.0391845703125, "rewards/real": 2.454113006591797, "step": 302 }, { "epoch": 0.63, "grad_norm": 41.888609903510016, "learning_rate": 1.779752251586906e-07, "logits/generated": 0.9290653467178345, "logits/oppo_generated": -2.5343804359436035, "logits/oppo_real": -2.5752387046813965, "logits/real": -0.9488723278045654, "logps/generated": -612.5504150390625, "logps/oppo_gen": -86.0159912109375, "logps/oppo_real": -196.22686767578125, "logps/real": -231.4734344482422, "loss": 0.3814, "loss/gen": 0.2599642276763916, "loss/real": 0.3707355856895447, "rewards/accuracies": 0.875, "rewards/generated": -526.534423828125, "rewards/margins": 491.287841796875, "rewards/real": -35.24655532836914, "step": 303 }, { "epoch": 0.64, "grad_norm": 44.957284956980274, "learning_rate": 1.7622809844142137e-07, "logits/generated": 0.5650679469108582, "logits/oppo_generated": -2.718064308166504, "logits/oppo_real": -2.7589216232299805, "logits/real": -1.9737653732299805, "logps/generated": -726.685791015625, "logps/oppo_gen": -87.50894165039062, "logps/oppo_real": -237.13243103027344, "logps/real": -236.24334716796875, "loss": 0.3701, "loss/gen": 0.10715784132480621, "loss/real": 0.2832576632499695, "rewards/accuracies": 1.0, "rewards/generated": -639.1767578125, "rewards/margins": 640.0657958984375, "rewards/real": 0.8890562057495117, "step": 304 }, { "epoch": 0.64, "grad_norm": 31.1571266410647, "learning_rate": 1.7448490950806548e-07, "logits/generated": 0.5373824834823608, "logits/oppo_generated": -2.698071002960205, "logits/oppo_real": -2.8672518730163574, "logits/real": -1.855992317199707, "logps/generated": -874.5352783203125, "logps/oppo_gen": -63.30276107788086, "logps/oppo_real": -260.84515380859375, "logps/real": -251.75595092773438, "loss": 0.2728, "loss/gen": 0.1308051496744156, "loss/real": 0.2010972499847412, "rewards/accuracies": 1.0, "rewards/generated": -811.2324829101562, "rewards/margins": 820.3217163085938, "rewards/real": 9.089208602905273, "step": 305 }, { "epoch": 0.64, "grad_norm": 29.924091179351734, "learning_rate": 1.7274575140626315e-07, "logits/generated": 2.4331858158111572, "logits/oppo_generated": -2.5791516304016113, "logits/oppo_real": -2.4137372970581055, "logits/real": -1.069727897644043, "logps/generated": -593.718994140625, "logps/oppo_gen": -62.98029708862305, "logps/oppo_real": -136.33953857421875, "logps/real": -150.31948852539062, "loss": 0.3726, "loss/gen": 0.06495876610279083, "loss/real": 0.28633958101272583, "rewards/accuracies": 0.9375, "rewards/generated": -530.7386474609375, "rewards/margins": 516.7587280273438, "rewards/real": -13.979955673217773, "step": 306 }, { "epoch": 0.64, "grad_norm": 55.12626885231872, "learning_rate": 1.7101071696849718e-07, "logits/generated": 0.914715588092804, "logits/oppo_generated": -2.8061037063598633, "logits/oppo_real": -2.7929673194885254, "logits/real": -1.7518253326416016, "logps/generated": -656.9697875976562, "logps/oppo_gen": -62.40575408935547, "logps/oppo_real": -266.6053161621094, "logps/real": -290.57000732421875, "loss": 0.4364, "loss/gen": 0.0519477054476738, "loss/real": 0.4437403678894043, "rewards/accuracies": 1.0, "rewards/generated": -594.5640869140625, "rewards/margins": 570.5994262695312, "rewards/real": -23.96465301513672, "step": 307 }, { "epoch": 0.64, "grad_norm": 37.763811628992116, "learning_rate": 1.692798988071385e-07, "logits/generated": 1.7852463722229004, "logits/oppo_generated": -2.7456932067871094, "logits/oppo_real": -2.709989309310913, "logits/real": -1.7187665700912476, "logps/generated": -818.27587890625, "logps/oppo_gen": -61.88515853881836, "logps/oppo_real": -247.7135467529297, "logps/real": -256.1959228515625, "loss": 0.3584, "loss/gen": 0.023114312440156937, "loss/real": 0.313579261302948, "rewards/accuracies": 0.9375, "rewards/generated": -756.3906860351562, "rewards/margins": 747.9083251953125, "rewards/real": -8.482393264770508, "step": 308 }, { "epoch": 0.65, "grad_norm": 38.874349999590024, "learning_rate": 1.6755338930950192e-07, "logits/generated": -0.14025013148784637, "logits/oppo_generated": -2.773202419281006, "logits/oppo_real": -2.869076728820801, "logits/real": -2.143603563308716, "logps/generated": -687.7984008789062, "logps/oppo_gen": -77.8241195678711, "logps/oppo_real": -341.8100280761719, "logps/real": -303.91571044921875, "loss": 0.2404, "loss/gen": 0.0828854888677597, "loss/real": 0.1460573673248291, "rewards/accuracies": 1.0, "rewards/generated": -609.9743041992188, "rewards/margins": 647.86865234375, "rewards/real": 37.89434814453125, "step": 309 }, { "epoch": 0.65, "grad_norm": 24.508315563054946, "learning_rate": 1.6583128063291573e-07, "logits/generated": 0.9043882489204407, "logits/oppo_generated": -2.831587791442871, "logits/oppo_real": -2.906121253967285, "logits/real": -2.295949697494507, "logps/generated": -736.5703125, "logps/oppo_gen": -70.72431945800781, "logps/oppo_real": -306.6643981933594, "logps/real": -273.4691162109375, "loss": 0.2844, "loss/gen": 0.03372867777943611, "loss/real": 0.14928202331066132, "rewards/accuracies": 1.0, "rewards/generated": -665.8460693359375, "rewards/margins": 699.041259765625, "rewards/real": 33.19529724121094, "step": 310 }, { "epoch": 0.65, "grad_norm": 25.9435328074503, "learning_rate": 1.6411366469980134e-07, "logits/generated": -0.18510423600673676, "logits/oppo_generated": -2.8098537921905518, "logits/oppo_real": -2.916290521621704, "logits/real": -2.3596861362457275, "logps/generated": -769.0659790039062, "logps/oppo_gen": -89.95899963378906, "logps/oppo_real": -356.05450439453125, "logps/real": -323.37353515625, "loss": 0.3019, "loss/gen": 0.023290330544114113, "loss/real": 0.15135399997234344, "rewards/accuracies": 1.0, "rewards/generated": -679.10693359375, "rewards/margins": 711.7879638671875, "rewards/real": 32.680999755859375, "step": 311 }, { "epoch": 0.65, "grad_norm": 38.68965658114824, "learning_rate": 1.6240063319276764e-07, "logits/generated": 0.11848394572734833, "logits/oppo_generated": -2.7743453979492188, "logits/oppo_real": -2.6186952590942383, "logits/real": -1.9814376831054688, "logps/generated": -694.65185546875, "logps/oppo_gen": -78.62828063964844, "logps/oppo_real": -246.07437133789062, "logps/real": -253.11190795898438, "loss": 0.3493, "loss/gen": 0.07453414052724838, "loss/real": 0.318087637424469, "rewards/accuracies": 0.9375, "rewards/generated": -616.0235595703125, "rewards/margins": 608.9860229492188, "rewards/real": -7.037555694580078, "step": 312 }, { "epoch": 0.65, "grad_norm": 38.516396734154824, "learning_rate": 1.606922775497168e-07, "logits/generated": -0.19997042417526245, "logits/oppo_generated": -2.8177504539489746, "logits/oppo_real": -2.7674360275268555, "logits/real": -2.163430690765381, "logps/generated": -664.20068359375, "logps/oppo_gen": -76.41138458251953, "logps/oppo_real": -294.525634765625, "logps/real": -269.033935546875, "loss": 0.2597, "loss/gen": 0.042983490973711014, "loss/real": 0.16210412979125977, "rewards/accuracies": 1.0, "rewards/generated": -587.789306640625, "rewards/margins": 613.281005859375, "rewards/real": 25.491722106933594, "step": 313 }, { "epoch": 0.66, "grad_norm": 25.751603456220085, "learning_rate": 1.5898868895896332e-07, "logits/generated": 0.7687809467315674, "logits/oppo_generated": -2.704318046569824, "logits/oppo_real": -2.5222792625427246, "logits/real": -2.074397325515747, "logps/generated": -562.7529296875, "logps/oppo_gen": -56.72496032714844, "logps/oppo_real": -257.3275451660156, "logps/real": -243.65383911132812, "loss": 0.2979, "loss/gen": 0.06320726126432419, "loss/real": 0.2052139937877655, "rewards/accuracies": 1.0, "rewards/generated": -506.0279235839844, "rewards/margins": 519.70166015625, "rewards/real": 13.673715591430664, "step": 314 }, { "epoch": 0.66, "grad_norm": 19.927441011666236, "learning_rate": 1.572899583543671e-07, "logits/generated": -0.32504546642303467, "logits/oppo_generated": -2.8547208309173584, "logits/oppo_real": -3.0012588500976562, "logits/real": -2.420483112335205, "logps/generated": -661.608642578125, "logps/oppo_gen": -89.96110534667969, "logps/oppo_real": -324.48052978515625, "logps/real": -324.35284423828125, "loss": 0.302, "loss/gen": 0.042295072227716446, "loss/real": 0.25394028425216675, "rewards/accuracies": 1.0, "rewards/generated": -571.6475830078125, "rewards/margins": 571.7752685546875, "rewards/real": 0.12767601013183594, "step": 315 }, { "epoch": 0.66, "grad_norm": 47.51428907197944, "learning_rate": 1.5559617641047885e-07, "logits/generated": -0.6027528643608093, "logits/oppo_generated": -2.6286120414733887, "logits/oppo_real": -2.6878955364227295, "logits/real": -2.110302448272705, "logps/generated": -716.825439453125, "logps/oppo_gen": -84.58811950683594, "logps/oppo_real": -344.87701416015625, "logps/real": -320.06689453125, "loss": 0.3296, "loss/gen": 0.036832332611083984, "loss/real": 0.18904441595077515, "rewards/accuracies": 1.0, "rewards/generated": -632.2373046875, "rewards/margins": 657.04736328125, "rewards/real": 24.81012725830078, "step": 316 }, { "epoch": 0.66, "grad_norm": 37.112358432704056, "learning_rate": 1.5390743353770108e-07, "logits/generated": -0.28523945808410645, "logits/oppo_generated": -2.688891887664795, "logits/oppo_real": -2.580460548400879, "logits/real": -2.2062671184539795, "logps/generated": -614.7840576171875, "logps/oppo_gen": -74.83438110351562, "logps/oppo_real": -342.4462890625, "logps/real": -363.833984375, "loss": 0.3025, "loss/gen": 0.03874469920992851, "loss/real": 0.3939588665962219, "rewards/accuracies": 0.9375, "rewards/generated": -539.94970703125, "rewards/margins": 518.5621337890625, "rewards/real": -21.387653350830078, "step": 317 }, { "epoch": 0.67, "grad_norm": 19.430635374697687, "learning_rate": 1.5222381987746102e-07, "logits/generated": 0.6181946396827698, "logits/oppo_generated": -2.5959243774414062, "logits/oppo_real": -2.7141470909118652, "logits/real": -1.8983464241027832, "logps/generated": -678.6424560546875, "logps/oppo_gen": -63.21453857421875, "logps/oppo_real": -254.0829620361328, "logps/real": -251.00709533691406, "loss": 0.2389, "loss/gen": 0.03314092755317688, "loss/real": 0.27147579193115234, "rewards/accuracies": 1.0, "rewards/generated": -615.4279174804688, "rewards/margins": 618.5037841796875, "rewards/real": 3.075847625732422, "step": 318 }, { "epoch": 0.67, "grad_norm": 49.08751352813666, "learning_rate": 1.5054542529740008e-07, "logits/generated": 0.9991955161094666, "logits/oppo_generated": -2.704498052597046, "logits/oppo_real": -2.7787587642669678, "logits/real": -2.1585323810577393, "logps/generated": -607.084228515625, "logps/oppo_gen": -58.10862731933594, "logps/oppo_real": -242.32754516601562, "logps/real": -258.293701171875, "loss": 0.3173, "loss/gen": 0.08082648366689682, "loss/real": 0.38607895374298096, "rewards/accuracies": 0.9375, "rewards/generated": -548.9755859375, "rewards/margins": 533.0094604492188, "rewards/real": -15.966143608093262, "step": 319 }, { "epoch": 0.67, "grad_norm": 55.92749343108785, "learning_rate": 1.488723393865766e-07, "logits/generated": -0.43444064259529114, "logits/oppo_generated": -2.8027491569519043, "logits/oppo_real": -2.7032413482666016, "logits/real": -2.063422203063965, "logps/generated": -628.6464233398438, "logps/oppo_gen": -73.10235595703125, "logps/oppo_real": -248.4849853515625, "logps/real": -226.80807495117188, "loss": 0.3594, "loss/gen": 0.17635869979858398, "loss/real": 0.16784903407096863, "rewards/accuracies": 1.0, "rewards/generated": -555.5441284179688, "rewards/margins": 577.2210693359375, "rewards/real": 21.676923751831055, "step": 320 }, { "epoch": 0.67, "grad_norm": 25.093725677136405, "learning_rate": 1.472046514506832e-07, "logits/generated": 1.7986483573913574, "logits/oppo_generated": -2.8599579334259033, "logits/oppo_real": -2.651264190673828, "logits/real": -2.081268787384033, "logps/generated": -733.37744140625, "logps/oppo_gen": -88.89913940429688, "logps/oppo_real": -204.5109405517578, "logps/real": -176.57421875, "loss": 0.313, "loss/gen": 0.029537349939346313, "loss/real": 0.1576533019542694, "rewards/accuracies": 1.0, "rewards/generated": -644.478271484375, "rewards/margins": 672.4150390625, "rewards/real": 27.93672752380371, "step": 321 }, { "epoch": 0.67, "grad_norm": 37.83629481170328, "learning_rate": 1.4554245050728084e-07, "logits/generated": 0.15214265882968903, "logits/oppo_generated": -2.7803587913513184, "logits/oppo_real": -2.8964691162109375, "logits/real": -2.03551983833313, "logps/generated": -604.7355346679688, "logps/oppo_gen": -65.7453384399414, "logps/oppo_real": -243.90567016601562, "logps/real": -237.9889678955078, "loss": 0.3164, "loss/gen": 0.10329093784093857, "loss/real": 0.22754207253456116, "rewards/accuracies": 1.0, "rewards/generated": -538.990234375, "rewards/margins": 544.9068603515625, "rewards/real": 5.916713714599609, "step": 322 }, { "epoch": 0.68, "grad_norm": 38.617218320086835, "learning_rate": 1.4388582528104627e-07, "logits/generated": 0.374458372592926, "logits/oppo_generated": -2.3750133514404297, "logits/oppo_real": -2.4892232418060303, "logits/real": -1.7002949714660645, "logps/generated": -648.1957397460938, "logps/oppo_gen": -89.67547607421875, "logps/oppo_real": -299.82208251953125, "logps/real": -309.90496826171875, "loss": 0.3357, "loss/gen": 0.21774569153785706, "loss/real": 0.3294346332550049, "rewards/accuracies": 1.0, "rewards/generated": -558.520263671875, "rewards/margins": 548.4373779296875, "rewards/real": -10.082886695861816, "step": 323 }, { "epoch": 0.68, "grad_norm": 64.62729686440552, "learning_rate": 1.422348641990369e-07, "logits/generated": 0.43694326281547546, "logits/oppo_generated": -2.5319583415985107, "logits/oppo_real": -2.6248130798339844, "logits/real": -1.630281686782837, "logps/generated": -490.3382873535156, "logps/oppo_gen": -65.50686645507812, "logps/oppo_real": -199.474853515625, "logps/real": -189.73822021484375, "loss": 0.3563, "loss/gen": 0.37786510586738586, "loss/real": 0.21018007397651672, "rewards/accuracies": 1.0, "rewards/generated": -424.8314208984375, "rewards/margins": 434.56805419921875, "rewards/real": 9.736638069152832, "step": 324 }, { "epoch": 0.68, "grad_norm": 62.558780952147316, "learning_rate": 1.4058965538597032e-07, "logits/generated": 0.40667301416397095, "logits/oppo_generated": -2.3989098072052, "logits/oppo_real": -2.523897171020508, "logits/real": -1.7344365119934082, "logps/generated": -598.157470703125, "logps/oppo_gen": -62.83578109741211, "logps/oppo_real": -293.08514404296875, "logps/real": -234.18716430664062, "loss": 0.3112, "loss/gen": 0.15413987636566162, "loss/real": 0.14755874872207642, "rewards/accuracies": 1.0, "rewards/generated": -535.3216552734375, "rewards/margins": 594.2197265625, "rewards/real": 58.897987365722656, "step": 325 }, { "epoch": 0.68, "grad_norm": 20.831581876247345, "learning_rate": 1.3895028665952057e-07, "logits/generated": -0.0944506824016571, "logits/oppo_generated": -2.80672025680542, "logits/oppo_real": -2.7922816276550293, "logits/real": -2.3034706115722656, "logps/generated": -768.2994384765625, "logps/oppo_gen": -82.2605972290039, "logps/oppo_real": -276.97515869140625, "logps/real": -247.0251922607422, "loss": 0.2874, "loss/gen": 0.016872398555278778, "loss/real": 0.153561532497406, "rewards/accuracies": 1.0, "rewards/generated": -686.038818359375, "rewards/margins": 715.9888305664062, "rewards/real": 29.94999122619629, "step": 326 }, { "epoch": 0.68, "grad_norm": 45.22126591134972, "learning_rate": 1.3731684552563027e-07, "logits/generated": -0.44738227128982544, "logits/oppo_generated": -2.735718250274658, "logits/oppo_real": -2.7187423706054688, "logits/real": -2.1731090545654297, "logps/generated": -783.983154296875, "logps/oppo_gen": -86.40644836425781, "logps/oppo_real": -320.1417236328125, "logps/real": -296.46685791015625, "loss": 0.341, "loss/gen": 0.11403728276491165, "loss/real": 0.1636250913143158, "rewards/accuracies": 1.0, "rewards/generated": -697.57666015625, "rewards/margins": 721.25146484375, "rewards/real": 23.674882888793945, "step": 327 }, { "epoch": 0.69, "grad_norm": 38.26118574954176, "learning_rate": 1.3568941917384036e-07, "logits/generated": 0.22915993630886078, "logits/oppo_generated": -2.673722267150879, "logits/oppo_real": -2.721848964691162, "logits/real": -1.9900763034820557, "logps/generated": -759.6171264648438, "logps/oppo_gen": -78.38516235351562, "logps/oppo_real": -173.74859619140625, "logps/real": -193.63247680664062, "loss": 0.3319, "loss/gen": 0.11070828139781952, "loss/real": 0.3567718267440796, "rewards/accuracies": 1.0, "rewards/generated": -681.23193359375, "rewards/margins": 661.3480834960938, "rewards/real": -19.883878707885742, "step": 328 }, { "epoch": 0.69, "grad_norm": 29.444253264312355, "learning_rate": 1.3406809447263568e-07, "logits/generated": -0.4770505428314209, "logits/oppo_generated": -2.7386703491210938, "logits/oppo_real": -2.7967844009399414, "logits/real": -2.254525899887085, "logps/generated": -660.37744140625, "logps/oppo_gen": -91.2105712890625, "logps/oppo_real": -351.5929870605469, "logps/real": -323.23577880859375, "loss": 0.3176, "loss/gen": 0.12337271124124527, "loss/real": 0.15685990452766418, "rewards/accuracies": 1.0, "rewards/generated": -569.1668701171875, "rewards/margins": 597.5240478515625, "rewards/real": 28.35721206665039, "step": 329 }, { "epoch": 0.69, "grad_norm": 31.31519761333944, "learning_rate": 1.3245295796480788e-07, "logits/generated": 0.2074773609638214, "logits/oppo_generated": -2.814140796661377, "logits/oppo_real": -2.840679883956909, "logits/real": -2.250474452972412, "logps/generated": -677.9022216796875, "logps/oppo_gen": -80.07135009765625, "logps/oppo_real": -288.6474304199219, "logps/real": -282.9855041503906, "loss": 0.3606, "loss/gen": 0.05122773349285126, "loss/real": 0.22256958484649658, "rewards/accuracies": 1.0, "rewards/generated": -597.8309326171875, "rewards/margins": 603.492919921875, "rewards/real": 5.661938190460205, "step": 330 }, { "epoch": 0.69, "grad_norm": 35.277156676851455, "learning_rate": 1.3084409586283694e-07, "logits/generated": 0.09115093946456909, "logits/oppo_generated": -2.589115858078003, "logits/oppo_real": -2.5021204948425293, "logits/real": -2.0292158126831055, "logps/generated": -757.2649536132812, "logps/oppo_gen": -103.41107177734375, "logps/oppo_real": -336.0791320800781, "logps/real": -308.29779052734375, "loss": 0.2886, "loss/gen": 0.0286662969738245, "loss/real": 0.15769243240356445, "rewards/accuracies": 1.0, "rewards/generated": -653.8538818359375, "rewards/margins": 681.6351928710938, "rewards/real": 27.78131103515625, "step": 331 }, { "epoch": 0.69, "grad_norm": 39.39202106064358, "learning_rate": 1.2924159404428801e-07, "logits/generated": 0.2052621841430664, "logits/oppo_generated": -2.5180981159210205, "logits/oppo_real": -2.463376045227051, "logits/real": -1.8674006462097168, "logps/generated": -663.0791015625, "logps/oppo_gen": -77.07872009277344, "logps/oppo_real": -306.44830322265625, "logps/real": -267.9314270019531, "loss": 0.304, "loss/gen": 0.15333253145217896, "loss/real": 0.17717936635017395, "rewards/accuracies": 0.9375, "rewards/generated": -586.0004272460938, "rewards/margins": 624.517333984375, "rewards/real": 38.516883850097656, "step": 332 }, { "epoch": 0.7, "grad_norm": 26.922403234080672, "learning_rate": 1.2764553804722867e-07, "logits/generated": 0.24261078238487244, "logits/oppo_generated": -2.7135229110717773, "logits/oppo_real": -2.7584338188171387, "logits/real": -2.0417680740356445, "logps/generated": -574.452880859375, "logps/oppo_gen": -63.32374572753906, "logps/oppo_real": -163.67233276367188, "logps/real": -160.25054931640625, "loss": 0.3333, "loss/gen": 0.17099058628082275, "loss/real": 0.23388735949993134, "rewards/accuracies": 1.0, "rewards/generated": -511.129150390625, "rewards/margins": 514.5509033203125, "rewards/real": 3.4218015670776367, "step": 333 }, { "epoch": 0.7, "grad_norm": 27.14057664971226, "learning_rate": 1.2605601306566204e-07, "logits/generated": 0.07767532020807266, "logits/oppo_generated": -2.9221487045288086, "logits/oppo_real": -3.001983165740967, "logits/real": -2.4487013816833496, "logps/generated": -646.2496337890625, "logps/oppo_gen": -80.82681274414062, "logps/oppo_real": -307.2742919921875, "logps/real": -280.507080078125, "loss": 0.2789, "loss/gen": 0.042075518518686295, "loss/real": 0.15917706489562988, "rewards/accuracies": 1.0, "rewards/generated": -565.4228515625, "rewards/margins": 592.1900634765625, "rewards/real": 26.767179489135742, "step": 334 }, { "epoch": 0.7, "grad_norm": 27.50873995525388, "learning_rate": 1.2447310394498017e-07, "logits/generated": 0.4118821918964386, "logits/oppo_generated": -2.5506510734558105, "logits/oppo_real": -2.6206767559051514, "logits/real": -1.6342732906341553, "logps/generated": -726.49755859375, "logps/oppo_gen": -65.73345947265625, "logps/oppo_real": -223.48028564453125, "logps/real": -207.2204132080078, "loss": 0.3043, "loss/gen": 0.024275805801153183, "loss/real": 0.2104792445898056, "rewards/accuracies": 1.0, "rewards/generated": -660.76416015625, "rewards/margins": 677.0239868164062, "rewards/real": 16.259868621826172, "step": 335 }, { "epoch": 0.7, "grad_norm": 43.03536881159496, "learning_rate": 1.2289689517743472e-07, "logits/generated": 0.2103143334388733, "logits/oppo_generated": -2.7872653007507324, "logits/oppo_real": -2.8988583087921143, "logits/real": -2.260883092880249, "logps/generated": -628.6256103515625, "logps/oppo_gen": -63.961341857910156, "logps/oppo_real": -180.2196044921875, "logps/real": -161.49661254882812, "loss": 0.274, "loss/gen": 0.06484713405370712, "loss/real": 0.17386212944984436, "rewards/accuracies": 1.0, "rewards/generated": -564.6642456054688, "rewards/margins": 583.3872680664062, "rewards/real": 18.722999572753906, "step": 336 }, { "epoch": 0.71, "grad_norm": 35.90554285639614, "learning_rate": 1.213274708976271e-07, "logits/generated": 0.4222411513328552, "logits/oppo_generated": -2.6874637603759766, "logits/oppo_real": -2.745856761932373, "logits/real": -1.9721487760543823, "logps/generated": -692.657958984375, "logps/oppo_gen": -80.49569702148438, "logps/oppo_real": -223.6146240234375, "logps/real": -244.1184844970703, "loss": 0.385, "loss/gen": 0.04486394301056862, "loss/real": 0.4462623596191406, "rewards/accuracies": 0.9375, "rewards/generated": -612.1622314453125, "rewards/margins": 591.6583251953125, "rewards/real": -20.50387191772461, "step": 337 }, { "epoch": 0.71, "grad_norm": 42.34218026380063, "learning_rate": 1.1976491487801746e-07, "logits/generated": 0.6293839812278748, "logits/oppo_generated": -2.5948119163513184, "logits/oppo_real": -2.5506985187530518, "logits/real": -1.8280091285705566, "logps/generated": -798.2767333984375, "logps/oppo_gen": -65.61744689941406, "logps/oppo_real": -219.27554321289062, "logps/real": -223.35015869140625, "loss": 0.3611, "loss/gen": 0.14065881073474884, "loss/real": 0.34547215700149536, "rewards/accuracies": 1.0, "rewards/generated": -732.6593017578125, "rewards/margins": 728.584716796875, "rewards/real": -4.07459831237793, "step": 338 }, { "epoch": 0.71, "grad_norm": 31.3995649368372, "learning_rate": 1.1820931052445297e-07, "logits/generated": 1.3286151885986328, "logits/oppo_generated": -2.7529683113098145, "logits/oppo_real": -2.6667518615722656, "logits/real": -1.918984293937683, "logps/generated": -792.933349609375, "logps/oppo_gen": -78.03943634033203, "logps/oppo_real": -245.19813537597656, "logps/real": -264.14959716796875, "loss": 0.3132, "loss/gen": 0.143991157412529, "loss/real": 0.3856561779975891, "rewards/accuracies": 0.9375, "rewards/generated": -714.8939208984375, "rewards/margins": 695.9423828125, "rewards/real": -18.951465606689453, "step": 339 }, { "epoch": 0.71, "grad_norm": 42.54585646503958, "learning_rate": 1.1666074087171627e-07, "logits/generated": 0.2029736191034317, "logits/oppo_generated": -2.6604325771331787, "logits/oppo_real": -2.737955093383789, "logits/real": -2.145906448364258, "logps/generated": -556.6988525390625, "logps/oppo_gen": -53.90046691894531, "logps/oppo_real": -178.48764038085938, "logps/real": -149.980224609375, "loss": 0.2652, "loss/gen": 0.19677889347076416, "loss/real": 0.15636593103408813, "rewards/accuracies": 1.0, "rewards/generated": -502.79840087890625, "rewards/margins": 531.3058471679688, "rewards/real": 28.507417678833008, "step": 340 }, { "epoch": 0.71, "grad_norm": 26.573972706214345, "learning_rate": 1.1511928857909264e-07, "logits/generated": -0.5622884035110474, "logits/oppo_generated": -2.7561309337615967, "logits/oppo_real": -2.7139782905578613, "logits/real": -2.2837305068969727, "logps/generated": -767.5333862304688, "logps/oppo_gen": -94.01554870605469, "logps/oppo_real": -280.5321044921875, "logps/real": -261.78564453125, "loss": 0.2716, "loss/gen": 0.024503124877810478, "loss/real": 0.17600321769714355, "rewards/accuracies": 1.0, "rewards/generated": -673.517822265625, "rewards/margins": 692.2642822265625, "rewards/real": 18.7464599609375, "step": 341 }, { "epoch": 0.72, "grad_norm": 48.54658583354498, "learning_rate": 1.1358503592595837e-07, "logits/generated": 0.8509594202041626, "logits/oppo_generated": -2.813668727874756, "logits/oppo_real": -2.8454103469848633, "logits/real": -2.246967077255249, "logps/generated": -787.0734252929688, "logps/oppo_gen": -83.64794158935547, "logps/oppo_real": -344.2013854980469, "logps/real": -316.9454345703125, "loss": 0.3343, "loss/gen": 0.08634554594755173, "loss/real": 0.16001561284065247, "rewards/accuracies": 1.0, "rewards/generated": -703.425537109375, "rewards/margins": 730.6814575195312, "rewards/real": 27.255950927734375, "step": 342 }, { "epoch": 0.72, "grad_norm": 26.039506681255023, "learning_rate": 1.120580648073885e-07, "logits/generated": 0.20105311274528503, "logits/oppo_generated": -2.6984949111938477, "logits/oppo_real": -2.83193302154541, "logits/real": -1.8173322677612305, "logps/generated": -666.4171142578125, "logps/oppo_gen": -75.18173217773438, "logps/oppo_real": -207.76528930664062, "logps/real": -187.3746795654297, "loss": 0.275, "loss/gen": 0.20881646871566772, "loss/real": 0.180282324552536, "rewards/accuracies": 1.0, "rewards/generated": -591.2354125976562, "rewards/margins": 611.6259765625, "rewards/real": 20.390621185302734, "step": 343 }, { "epoch": 0.72, "grad_norm": 56.60992918235502, "learning_rate": 1.1053845672978565e-07, "logits/generated": 1.0070171356201172, "logits/oppo_generated": -2.7348833084106445, "logits/oppo_real": -2.7071237564086914, "logits/real": -1.6740999221801758, "logps/generated": -795.6455078125, "logps/oppo_gen": -76.39961242675781, "logps/oppo_real": -203.86724853515625, "logps/real": -246.10214233398438, "loss": 0.3551, "loss/gen": 0.02106640115380287, "loss/real": 0.5011658072471619, "rewards/accuracies": 1.0, "rewards/generated": -719.2459106445312, "rewards/margins": 677.0111083984375, "rewards/real": -42.23489761352539, "step": 344 }, { "epoch": 0.72, "grad_norm": 35.033887751941336, "learning_rate": 1.090262928065293e-07, "logits/generated": 0.5341898798942566, "logits/oppo_generated": -2.7704925537109375, "logits/oppo_real": -2.7265381813049316, "logits/real": -1.9755090475082397, "logps/generated": -791.3928833007812, "logps/oppo_gen": -85.65669250488281, "logps/oppo_real": -332.951904296875, "logps/real": -323.0646667480469, "loss": 0.3046, "loss/gen": 0.014689632691442966, "loss/real": 0.23723618686199188, "rewards/accuracies": 1.0, "rewards/generated": -705.7362060546875, "rewards/margins": 715.6234130859375, "rewards/real": 9.887211799621582, "step": 345 }, { "epoch": 0.72, "grad_norm": 68.98002003524282, "learning_rate": 1.0752165375364591e-07, "logits/generated": 0.32197409868240356, "logits/oppo_generated": -2.6534223556518555, "logits/oppo_real": -2.706442356109619, "logits/real": -1.8222732543945312, "logps/generated": -825.9232788085938, "logps/oppo_gen": -88.76399993896484, "logps/oppo_real": -302.44403076171875, "logps/real": -287.33404541015625, "loss": 0.2836, "loss/gen": 0.07158385217189789, "loss/real": 0.23982387781143188, "rewards/accuracies": 1.0, "rewards/generated": -737.1593017578125, "rewards/margins": 752.269287109375, "rewards/real": 15.1099853515625, "step": 346 }, { "epoch": 0.73, "grad_norm": 49.98578411189776, "learning_rate": 1.060246198855011e-07, "logits/generated": 0.27689942717552185, "logits/oppo_generated": -2.723323345184326, "logits/oppo_real": -2.7297825813293457, "logits/real": -1.9392592906951904, "logps/generated": -671.2532958984375, "logps/oppo_gen": -64.74773406982422, "logps/oppo_real": -236.87423706054688, "logps/real": -226.2437744140625, "loss": 0.284, "loss/gen": 0.04255779832601547, "loss/real": 0.24799221754074097, "rewards/accuracies": 1.0, "rewards/generated": -606.5055541992188, "rewards/margins": 617.135986328125, "rewards/real": 10.630456924438477, "step": 347 }, { "epoch": 0.73, "grad_norm": 26.12380146398467, "learning_rate": 1.0453527111051183e-07, "logits/generated": 0.04960054159164429, "logits/oppo_generated": -2.8397562503814697, "logits/oppo_real": -2.856273651123047, "logits/real": -2.319634199142456, "logps/generated": -784.1358642578125, "logps/oppo_gen": -91.67137145996094, "logps/oppo_real": -355.03753662109375, "logps/real": -327.039794921875, "loss": 0.3013, "loss/gen": 0.01843111217021942, "loss/real": 0.15837496519088745, "rewards/accuracies": 1.0, "rewards/generated": -692.4644775390625, "rewards/margins": 720.462158203125, "rewards/real": 27.997737884521484, "step": 348 }, { "epoch": 0.73, "grad_norm": 48.17637781014934, "learning_rate": 1.0305368692688174e-07, "logits/generated": 1.1829630136489868, "logits/oppo_generated": -2.7997734546661377, "logits/oppo_real": -2.955216884613037, "logits/real": -2.1545634269714355, "logps/generated": -692.6908569335938, "logps/oppo_gen": -76.2406005859375, "logps/oppo_real": -269.97576904296875, "logps/real": -239.1646728515625, "loss": 0.3035, "loss/gen": 0.019039634615182877, "loss/real": 0.15278419852256775, "rewards/accuracies": 1.0, "rewards/generated": -616.4503173828125, "rewards/margins": 647.2613525390625, "rewards/real": 30.81109619140625, "step": 349 }, { "epoch": 0.73, "grad_norm": 38.29671243845547, "learning_rate": 1.0157994641835734e-07, "logits/generated": 1.381691813468933, "logits/oppo_generated": -2.750364303588867, "logits/oppo_real": -2.6481316089630127, "logits/real": -2.0888710021972656, "logps/generated": -985.8902587890625, "logps/oppo_gen": -67.35143280029297, "logps/oppo_real": -269.8861083984375, "logps/real": -248.64231872558594, "loss": 0.3397, "loss/gen": 0.00863666832447052, "loss/real": 0.17384257912635803, "rewards/accuracies": 1.0, "rewards/generated": -918.538818359375, "rewards/margins": 939.7825927734375, "rewards/real": 21.243799209594727, "step": 350 }, { "epoch": 0.73, "grad_norm": 35.82324568824864, "learning_rate": 1.0011412825000693e-07, "logits/generated": 0.5441445708274841, "logits/oppo_generated": -2.8472161293029785, "logits/oppo_real": -2.733160972595215, "logits/real": -2.123514175415039, "logps/generated": -677.7409057617188, "logps/oppo_gen": -73.3748779296875, "logps/oppo_real": -268.7559509277344, "logps/real": -268.14208984375, "loss": 0.2839, "loss/gen": 0.022950371727347374, "loss/real": 0.2343311309814453, "rewards/accuracies": 1.0, "rewards/generated": -604.3660888671875, "rewards/margins": 604.97998046875, "rewards/real": 0.6138732433319092, "step": 351 }, { "epoch": 0.74, "grad_norm": 34.41360403355227, "learning_rate": 9.865631066402136e-08, "logits/generated": 1.2953248023986816, "logits/oppo_generated": -2.5277109146118164, "logits/oppo_real": -2.492619514465332, "logits/real": -2.0883584022521973, "logps/generated": -750.603515625, "logps/oppo_gen": -61.516632080078125, "logps/oppo_real": -233.42202758789062, "logps/real": -203.82994079589844, "loss": 0.305, "loss/gen": 0.016972701996564865, "loss/real": 0.1576637625694275, "rewards/accuracies": 1.0, "rewards/generated": -689.0869140625, "rewards/margins": 718.678955078125, "rewards/real": 29.592084884643555, "step": 352 }, { "epoch": 0.74, "grad_norm": 85.37379322902608, "learning_rate": 9.720657147553767e-08, "logits/generated": 1.9441397190093994, "logits/oppo_generated": -2.8127946853637695, "logits/oppo_real": -2.8368353843688965, "logits/real": -1.9017053842544556, "logps/generated": -745.4407958984375, "logps/oppo_gen": -76.67695617675781, "logps/oppo_real": -326.9615783691406, "logps/real": -336.90020751953125, "loss": 0.3262, "loss/gen": 0.009169317781925201, "loss/real": 0.3471587896347046, "rewards/accuracies": 1.0, "rewards/generated": -668.7637939453125, "rewards/margins": 658.8252563476562, "rewards/real": -9.938613891601562, "step": 353 }, { "epoch": 0.74, "grad_norm": 28.62276106828052, "learning_rate": 9.57649880684859e-08, "logits/generated": 0.5683245658874512, "logits/oppo_generated": -2.7986297607421875, "logits/oppo_real": -2.700263500213623, "logits/real": -1.9732091426849365, "logps/generated": -679.8853759765625, "logps/oppo_gen": -71.45687103271484, "logps/oppo_real": -221.93785095214844, "logps/real": -217.52499389648438, "loss": 0.2688, "loss/gen": 0.09031351655721664, "loss/real": 0.2791171371936798, "rewards/accuracies": 1.0, "rewards/generated": -608.4285278320312, "rewards/margins": 612.84130859375, "rewards/real": 4.412837982177734, "step": 354 }, { "epoch": 0.74, "grad_norm": 35.88830872477833, "learning_rate": 9.433163739145771e-08, "logits/generated": -0.6793491244316101, "logits/oppo_generated": -2.8078927993774414, "logits/oppo_real": -2.8096060752868652, "logits/real": -2.194153308868408, "logps/generated": -783.4717407226562, "logps/oppo_gen": -86.69573974609375, "logps/oppo_real": -399.6392822265625, "logps/real": -386.584716796875, "loss": 0.2717, "loss/gen": 0.0438024066388607, "loss/real": 0.23390790820121765, "rewards/accuracies": 1.0, "rewards/generated": -696.7760009765625, "rewards/margins": 709.83056640625, "rewards/real": 13.0545654296875, "step": 355 }, { "epoch": 0.74, "grad_norm": 38.479150452306634, "learning_rate": 9.290659595360017e-08, "logits/generated": 0.6003881692886353, "logits/oppo_generated": -2.6745800971984863, "logits/oppo_real": -2.8772976398468018, "logits/real": -1.8411824703216553, "logps/generated": -703.7737426757812, "logps/oppo_gen": -74.71858215332031, "logps/oppo_real": -277.74951171875, "logps/real": -292.45513916015625, "loss": 0.386, "loss/gen": 0.03084971383213997, "loss/real": 0.34726041555404663, "rewards/accuracies": 1.0, "rewards/generated": -629.05517578125, "rewards/margins": 614.3494873046875, "rewards/real": -14.705662727355957, "step": 356 }, { "epoch": 0.75, "grad_norm": 20.115341996659616, "learning_rate": 9.148993982053058e-08, "logits/generated": 0.06593459844589233, "logits/oppo_generated": -2.825925350189209, "logits/oppo_real": -2.854370594024658, "logits/real": -1.9273500442504883, "logps/generated": -762.333251953125, "logps/oppo_gen": -91.21415710449219, "logps/oppo_real": -272.8486328125, "logps/real": -282.2558898925781, "loss": 0.274, "loss/gen": 0.03180719166994095, "loss/real": 0.26319044828414917, "rewards/accuracies": 1.0, "rewards/generated": -671.1190795898438, "rewards/margins": 661.7117919921875, "rewards/real": -9.407278060913086, "step": 357 }, { "epoch": 0.75, "grad_norm": 29.17447660968777, "learning_rate": 9.008174461027723e-08, "logits/generated": 0.7246966361999512, "logits/oppo_generated": -2.6950693130493164, "logits/oppo_real": -2.750147819519043, "logits/real": -2.081655979156494, "logps/generated": -689.679443359375, "logps/oppo_gen": -83.51091003417969, "logps/oppo_real": -333.22991943359375, "logps/real": -327.7589416503906, "loss": 0.2634, "loss/gen": 0.07645511627197266, "loss/real": 0.25844529271125793, "rewards/accuracies": 0.9375, "rewards/generated": -606.1685791015625, "rewards/margins": 611.6395263671875, "rewards/real": 5.470993995666504, "step": 358 }, { "epoch": 0.75, "grad_norm": 24.78207384546401, "learning_rate": 8.868208548924253e-08, "logits/generated": 0.16621741652488708, "logits/oppo_generated": -2.6902928352355957, "logits/oppo_real": -2.733646869659424, "logits/real": -2.0168347358703613, "logps/generated": -673.65087890625, "logps/oppo_gen": -75.88461303710938, "logps/oppo_real": -268.97918701171875, "logps/real": -247.7440643310547, "loss": 0.2872, "loss/gen": 0.03719499707221985, "loss/real": 0.188075989484787, "rewards/accuracies": 1.0, "rewards/generated": -597.7662353515625, "rewards/margins": 619.0014038085938, "rewards/real": 21.235126495361328, "step": 359 }, { "epoch": 0.75, "grad_norm": 34.72498853997741, "learning_rate": 8.729103716819111e-08, "logits/generated": 0.26208943128585815, "logits/oppo_generated": -2.67659068107605, "logits/oppo_real": -2.6717331409454346, "logits/real": -2.199741840362549, "logps/generated": -711.75146484375, "logps/oppo_gen": -72.04103088378906, "logps/oppo_real": -243.22787475585938, "logps/real": -215.61843872070312, "loss": 0.3307, "loss/gen": 0.23388239741325378, "loss/real": 0.15823516249656677, "rewards/accuracies": 1.0, "rewards/generated": -639.71044921875, "rewards/margins": 667.31982421875, "rewards/real": 27.609418869018555, "step": 360 }, { "epoch": 0.76, "grad_norm": 26.014443713796403, "learning_rate": 8.590867389826179e-08, "logits/generated": -0.7573856115341187, "logits/oppo_generated": -2.835716962814331, "logits/oppo_real": -2.741757392883301, "logits/real": -2.0698163509368896, "logps/generated": -635.7809448242188, "logps/oppo_gen": -69.39401245117188, "logps/oppo_real": -260.70556640625, "logps/real": -233.06491088867188, "loss": 0.2933, "loss/gen": 0.20804080367088318, "loss/real": 0.15793883800506592, "rewards/accuracies": 1.0, "rewards/generated": -566.3869018554688, "rewards/margins": 594.0275268554688, "rewards/real": 27.640644073486328, "step": 361 }, { "epoch": 0.76, "grad_norm": 34.53451094613999, "learning_rate": 8.453506946700417e-08, "logits/generated": 0.09254428744316101, "logits/oppo_generated": -2.665210008621216, "logits/oppo_real": -2.8125662803649902, "logits/real": -1.8576146364212036, "logps/generated": -872.798828125, "logps/oppo_gen": -100.13014221191406, "logps/oppo_real": -193.47467041015625, "logps/real": -196.80953979492188, "loss": 0.2903, "loss/gen": 0.019167516380548477, "loss/real": 0.2566121816635132, "rewards/accuracies": 1.0, "rewards/generated": -772.6685791015625, "rewards/margins": 769.333740234375, "rewards/real": -3.3348846435546875, "step": 362 }, { "epoch": 0.76, "grad_norm": 61.85891126057547, "learning_rate": 8.317029719444016e-08, "logits/generated": 0.3024826645851135, "logits/oppo_generated": -2.765684127807617, "logits/oppo_real": -2.8010177612304688, "logits/real": -2.0826213359832764, "logps/generated": -720.9824829101562, "logps/oppo_gen": -92.44059753417969, "logps/oppo_real": -245.97979736328125, "logps/real": -244.0174560546875, "loss": 0.3713, "loss/gen": 0.07919669896364212, "loss/real": 0.22272387146949768, "rewards/accuracies": 1.0, "rewards/generated": -628.5418701171875, "rewards/margins": 630.5042724609375, "rewards/real": 1.962360143661499, "step": 363 }, { "epoch": 0.76, "grad_norm": 39.76462271575782, "learning_rate": 8.181442992915e-08, "logits/generated": 0.19746087491512299, "logits/oppo_generated": -2.796973943710327, "logits/oppo_real": -2.726362705230713, "logits/real": -1.9224494695663452, "logps/generated": -806.6083984375, "logps/oppo_gen": -80.83071899414062, "logps/oppo_real": -287.78509521484375, "logps/real": -312.4919738769531, "loss": 0.315, "loss/gen": 0.024429194629192352, "loss/real": 0.4663940668106079, "rewards/accuracies": 0.9375, "rewards/generated": -725.7777099609375, "rewards/margins": 701.0706787109375, "rewards/real": -24.706878662109375, "step": 364 }, { "epoch": 0.76, "grad_norm": 47.83683510610573, "learning_rate": 8.046754004438428e-08, "logits/generated": 0.40333959460258484, "logits/oppo_generated": -2.7642383575439453, "logits/oppo_real": -2.565757989883423, "logits/real": -2.1065196990966797, "logps/generated": -664.4947509765625, "logps/oppo_gen": -78.50663757324219, "logps/oppo_real": -220.23593139648438, "logps/real": -186.36785888671875, "loss": 0.2479, "loss/gen": 0.07458934187889099, "loss/real": 0.15034610033035278, "rewards/accuracies": 1.0, "rewards/generated": -585.9880981445312, "rewards/margins": 619.856201171875, "rewards/real": 33.868080139160156, "step": 365 }, { "epoch": 0.77, "grad_norm": 36.08634934951165, "learning_rate": 7.912969943420017e-08, "logits/generated": -0.5886929631233215, "logits/oppo_generated": -2.8737826347351074, "logits/oppo_real": -2.7705636024475098, "logits/real": -1.9518818855285645, "logps/generated": -682.5374755859375, "logps/oppo_gen": -81.06214904785156, "logps/oppo_real": -277.17974853515625, "logps/real": -259.0784912109375, "loss": 0.2526, "loss/gen": 0.13476580381393433, "loss/real": 0.1966303288936615, "rewards/accuracies": 1.0, "rewards/generated": -601.475341796875, "rewards/margins": 619.57666015625, "rewards/real": 18.10125732421875, "step": 366 }, { "epoch": 0.77, "grad_norm": 52.871022031951725, "learning_rate": 7.780097950962447e-08, "logits/generated": 0.373833030462265, "logits/oppo_generated": -2.6847777366638184, "logits/oppo_real": -2.5956692695617676, "logits/real": -2.0350966453552246, "logps/generated": -751.0484619140625, "logps/oppo_gen": -76.97508239746094, "logps/oppo_real": -226.62579345703125, "logps/real": -217.1838836669922, "loss": 0.3235, "loss/gen": 0.02066943421959877, "loss/real": 0.22452738881111145, "rewards/accuracies": 1.0, "rewards/generated": -674.0733642578125, "rewards/margins": 683.515380859375, "rewards/real": 9.441933631896973, "step": 367 }, { "epoch": 0.77, "grad_norm": 20.391625484315323, "learning_rate": 7.648145119484151e-08, "logits/generated": 0.03344006836414337, "logits/oppo_generated": -2.886763095855713, "logits/oppo_real": -2.899338960647583, "logits/real": -2.359893560409546, "logps/generated": -1293.310546875, "logps/oppo_gen": -96.46270751953125, "logps/oppo_real": -380.9791259765625, "logps/real": -356.67138671875, "loss": 0.2828, "loss/gen": 0.015580754727125168, "loss/real": 0.1647091805934906, "rewards/accuracies": 1.0, "rewards/generated": -1196.847900390625, "rewards/margins": 1221.1556396484375, "rewards/real": 24.307750701904297, "step": 368 }, { "epoch": 0.77, "grad_norm": 32.37493967175846, "learning_rate": 7.517118492340748e-08, "logits/generated": 1.0505952835083008, "logits/oppo_generated": -2.832742691040039, "logits/oppo_real": -2.962982654571533, "logits/real": -1.8713144063949585, "logps/generated": -675.9676513671875, "logps/oppo_gen": -68.11212158203125, "logps/oppo_real": -284.201904296875, "logps/real": -310.61572265625, "loss": 0.2927, "loss/gen": 0.026173148304224014, "loss/real": 0.45966047048568726, "rewards/accuracies": 1.0, "rewards/generated": -607.85546875, "rewards/margins": 581.4417114257812, "rewards/real": -26.413818359375, "step": 369 }, { "epoch": 0.77, "grad_norm": 17.18461090775061, "learning_rate": 7.387025063449081e-08, "logits/generated": 1.1518608331680298, "logits/oppo_generated": -2.6026477813720703, "logits/oppo_real": -2.6428492069244385, "logits/real": -1.8430811166763306, "logps/generated": -653.1893310546875, "logps/oppo_gen": -60.00908660888672, "logps/oppo_real": -280.5744323730469, "logps/real": -255.991455078125, "loss": 0.2254, "loss/gen": 0.09616182744503021, "loss/real": 0.16350211203098297, "rewards/accuracies": 1.0, "rewards/generated": -593.1802978515625, "rewards/margins": 617.7632446289062, "rewards/real": 24.582962036132812, "step": 370 }, { "epoch": 0.78, "grad_norm": 22.43516388104042, "learning_rate": 7.257871776913879e-08, "logits/generated": -0.040337368845939636, "logits/oppo_generated": -2.7108025550842285, "logits/oppo_real": -2.8590545654296875, "logits/real": -2.0019431114196777, "logps/generated": -726.527099609375, "logps/oppo_gen": -90.550537109375, "logps/oppo_real": -352.6172790527344, "logps/real": -318.4084167480469, "loss": 0.2788, "loss/gen": 0.053786490112543106, "loss/real": 0.14998918771743774, "rewards/accuracies": 1.0, "rewards/generated": -635.9765625, "rewards/margins": 670.1854248046875, "rewards/real": 34.2088623046875, "step": 371 }, { "epoch": 0.78, "grad_norm": 37.02779519749319, "learning_rate": 7.129665526657145e-08, "logits/generated": 0.5808796882629395, "logits/oppo_generated": -2.8980014324188232, "logits/oppo_real": -2.8099284172058105, "logits/real": -2.2642743587493896, "logps/generated": -742.5794067382812, "logps/oppo_gen": -78.37774658203125, "logps/oppo_real": -325.51953125, "logps/real": -323.89434814453125, "loss": 0.2738, "loss/gen": 0.04419238120317459, "loss/real": 0.2435626983642578, "rewards/accuracies": 1.0, "rewards/generated": -664.20166015625, "rewards/margins": 665.826904296875, "rewards/real": 1.6252069473266602, "step": 372 }, { "epoch": 0.78, "grad_norm": 22.601374140461225, "learning_rate": 7.002413156050108e-08, "logits/generated": 0.8091610670089722, "logits/oppo_generated": -2.7554728984832764, "logits/oppo_real": -2.894706964492798, "logits/real": -2.002586841583252, "logps/generated": -853.19775390625, "logps/oppo_gen": -74.49253845214844, "logps/oppo_real": -317.25726318359375, "logps/real": -293.00537109375, "loss": 0.3157, "loss/gen": 0.011781592853367329, "loss/real": 0.16653022170066833, "rewards/accuracies": 1.0, "rewards/generated": -778.7052612304688, "rewards/margins": 802.9571533203125, "rewards/real": 24.25188636779785, "step": 373 }, { "epoch": 0.78, "grad_norm": 45.91711654488852, "learning_rate": 6.876121457547995e-08, "logits/generated": 0.8897229433059692, "logits/oppo_generated": -2.9072961807250977, "logits/oppo_real": -2.758073329925537, "logits/real": -2.256460189819336, "logps/generated": -862.1790161132812, "logps/oppo_gen": -78.95392608642578, "logps/oppo_real": -264.9425964355469, "logps/real": -287.25244140625, "loss": 0.2942, "loss/gen": 0.04109443724155426, "loss/real": 0.34802842140197754, "rewards/accuracies": 1.0, "rewards/generated": -783.22509765625, "rewards/margins": 760.915283203125, "rewards/real": -22.30983543395996, "step": 374 }, { "epoch": 0.78, "grad_norm": 44.29882601523572, "learning_rate": 6.75079717232744e-08, "logits/generated": -0.22042664885520935, "logits/oppo_generated": -2.6750731468200684, "logits/oppo_real": -2.583487033843994, "logits/real": -1.7571690082550049, "logps/generated": -773.0181884765625, "logps/oppo_gen": -91.54539489746094, "logps/oppo_real": -271.75091552734375, "logps/real": -281.9573974609375, "loss": 0.2854, "loss/gen": 0.1139841377735138, "loss/real": 0.32424396276474, "rewards/accuracies": 0.9375, "rewards/generated": -681.4728393554688, "rewards/margins": 671.2662963867188, "rewards/real": -10.206502914428711, "step": 375 }, { "epoch": 0.79, "grad_norm": 40.28310459268253, "learning_rate": 6.626446989926652e-08, "logits/generated": 0.7616984844207764, "logits/oppo_generated": -2.8004720211029053, "logits/oppo_real": -2.6996493339538574, "logits/real": -1.916635513305664, "logps/generated": -710.7991943359375, "logps/oppo_gen": -80.80632019042969, "logps/oppo_real": -282.40545654296875, "logps/real": -315.500244140625, "loss": 0.3345, "loss/gen": 0.052083853632211685, "loss/real": 0.46380823850631714, "rewards/accuracies": 1.0, "rewards/generated": -629.992919921875, "rewards/margins": 596.8980712890625, "rewards/real": -33.09479522705078, "step": 376 }, { "epoch": 0.79, "grad_norm": 38.0174151217447, "learning_rate": 6.503077547888352e-08, "logits/generated": 0.3516882359981537, "logits/oppo_generated": -2.7037720680236816, "logits/oppo_real": -2.8666648864746094, "logits/real": -1.8362528085708618, "logps/generated": -685.1957397460938, "logps/oppo_gen": -63.57231140136719, "logps/oppo_real": -290.30877685546875, "logps/real": -291.5946044921875, "loss": 0.3308, "loss/gen": 0.07059072703123093, "loss/real": 0.27046704292297363, "rewards/accuracies": 1.0, "rewards/generated": -621.6234130859375, "rewards/margins": 620.337646484375, "rewards/real": -1.2858242988586426, "step": 377 }, { "epoch": 0.79, "grad_norm": 24.148170847363108, "learning_rate": 6.380695431405453e-08, "logits/generated": 0.8600256443023682, "logits/oppo_generated": -2.863770008087158, "logits/oppo_real": -2.788264751434326, "logits/real": -2.4320356845855713, "logps/generated": -767.5900268554688, "logps/oppo_gen": -78.98788452148438, "logps/oppo_real": -346.01312255859375, "logps/real": -334.597412109375, "loss": 0.2895, "loss/gen": 0.0075337570160627365, "loss/real": 0.24575293064117432, "rewards/accuracies": 1.0, "rewards/generated": -688.6021728515625, "rewards/margins": 700.0179443359375, "rewards/real": 11.415748596191406, "step": 378 }, { "epoch": 0.79, "grad_norm": 19.998951693717753, "learning_rate": 6.259307172969606e-08, "logits/generated": 0.8398880958557129, "logits/oppo_generated": -2.7056989669799805, "logits/oppo_real": -2.744664192199707, "logits/real": -1.7210136651992798, "logps/generated": -686.8607177734375, "logps/oppo_gen": -70.72666931152344, "logps/oppo_real": -221.324951171875, "logps/real": -223.03982543945312, "loss": 0.2638, "loss/gen": 0.017973629757761955, "loss/real": 0.3097477853298187, "rewards/accuracies": 1.0, "rewards/generated": -616.1341552734375, "rewards/margins": 614.4192504882812, "rewards/real": -1.7148818969726562, "step": 379 }, { "epoch": 0.79, "grad_norm": 29.975495566756795, "learning_rate": 6.138919252022435e-08, "logits/generated": -0.37420105934143066, "logits/oppo_generated": -2.858649492263794, "logits/oppo_real": -2.756929874420166, "logits/real": -2.3294174671173096, "logps/generated": -824.7210693359375, "logps/oppo_gen": -85.56549072265625, "logps/oppo_real": -359.80169677734375, "logps/real": -327.5230712890625, "loss": 0.2376, "loss/gen": 0.030374273657798767, "loss/real": 0.15413853526115417, "rewards/accuracies": 1.0, "rewards/generated": -739.1556396484375, "rewards/margins": 771.4342041015625, "rewards/real": 32.27858352661133, "step": 380 }, { "epoch": 0.8, "grad_norm": 28.702696521366356, "learning_rate": 6.019538094609759e-08, "logits/generated": 0.43896496295928955, "logits/oppo_generated": -2.8645360469818115, "logits/oppo_real": -2.8193347454071045, "logits/real": -2.3504862785339355, "logps/generated": -752.4977416992188, "logps/oppo_gen": -87.71719360351562, "logps/oppo_real": -305.0334167480469, "logps/real": -288.9637756347656, "loss": 0.2437, "loss/gen": 0.06234963238239288, "loss/real": 0.18159055709838867, "rewards/accuracies": 1.0, "rewards/generated": -664.780517578125, "rewards/margins": 680.8502197265625, "rewards/real": 16.069637298583984, "step": 381 }, { "epoch": 0.8, "grad_norm": 41.82788861065067, "learning_rate": 5.9011700730385224e-08, "logits/generated": 0.6830179691314697, "logits/oppo_generated": -2.926161766052246, "logits/oppo_real": -2.8411059379577637, "logits/real": -2.332524299621582, "logps/generated": -651.4114990234375, "logps/oppo_gen": -72.51399993896484, "logps/oppo_real": -324.98846435546875, "logps/real": -315.5436706542969, "loss": 0.3115, "loss/gen": 0.13546037673950195, "loss/real": 0.22851350903511047, "rewards/accuracies": 1.0, "rewards/generated": -578.8975830078125, "rewards/margins": 588.3423461914062, "rewards/real": 9.444814682006836, "step": 382 }, { "epoch": 0.8, "grad_norm": 23.53387866086291, "learning_rate": 5.7838215055366954e-08, "logits/generated": 0.6993808746337891, "logits/oppo_generated": -2.8700051307678223, "logits/oppo_real": -2.7701120376586914, "logits/real": -2.3738186359405518, "logps/generated": -675.7946166992188, "logps/oppo_gen": -63.61039352416992, "logps/oppo_real": -258.7322998046875, "logps/real": -240.4178466796875, "loss": 0.2715, "loss/gen": 0.0490497350692749, "loss/real": 0.17794281244277954, "rewards/accuracies": 1.0, "rewards/generated": -612.1842651367188, "rewards/margins": 630.4986572265625, "rewards/real": 18.31442642211914, "step": 383 }, { "epoch": 0.8, "grad_norm": 27.44267929653838, "learning_rate": 5.6674986559160004e-08, "logits/generated": 1.583488941192627, "logits/oppo_generated": -2.539917469024658, "logits/oppo_real": -2.5254969596862793, "logits/real": -1.7350016832351685, "logps/generated": -687.21630859375, "logps/oppo_gen": -51.28068923950195, "logps/oppo_real": -226.71041870117188, "logps/real": -194.57864379882812, "loss": 0.2423, "loss/gen": 0.019552189856767654, "loss/real": 0.16991135478019714, "rewards/accuracies": 1.0, "rewards/generated": -635.935546875, "rewards/margins": 668.0673828125, "rewards/real": 32.13178634643555, "step": 384 }, { "epoch": 0.81, "grad_norm": 23.579331272986728, "learning_rate": 5.552207733237543e-08, "logits/generated": 0.33335748314857483, "logits/oppo_generated": -2.7529163360595703, "logits/oppo_real": -2.814352512359619, "logits/real": -1.8816819190979004, "logps/generated": -772.8755493164062, "logps/oppo_gen": -78.71615600585938, "logps/oppo_real": -282.2195129394531, "logps/real": -305.6546936035156, "loss": 0.2747, "loss/gen": 0.0355767123401165, "loss/real": 0.39402052760124207, "rewards/accuracies": 0.9375, "rewards/generated": -694.159423828125, "rewards/margins": 670.7242431640625, "rewards/real": -23.4351806640625, "step": 385 }, { "epoch": 0.81, "grad_norm": 25.897959083062293, "learning_rate": 5.4379548914804427e-08, "logits/generated": -1.016433835029602, "logits/oppo_generated": -2.8893675804138184, "logits/oppo_real": -2.8325114250183105, "logits/real": -2.454998016357422, "logps/generated": -944.657470703125, "logps/oppo_gen": -82.72488403320312, "logps/oppo_real": -338.88629150390625, "logps/real": -304.11285400390625, "loss": 0.3319, "loss/gen": 0.010652797296643257, "loss/real": 0.14876721799373627, "rewards/accuracies": 1.0, "rewards/generated": -861.9326171875, "rewards/margins": 896.7060546875, "rewards/real": 34.7734375, "step": 386 }, { "epoch": 0.81, "grad_norm": 32.422641180706, "learning_rate": 5.324746229213281e-08, "logits/generated": -0.0454447939991951, "logits/oppo_generated": -2.660921573638916, "logits/oppo_real": -2.8100593090057373, "logits/real": -1.9440929889678955, "logps/generated": -732.8896484375, "logps/oppo_gen": -74.4239730834961, "logps/oppo_real": -280.35174560546875, "logps/real": -247.58436584472656, "loss": 0.3088, "loss/gen": 0.048400383442640305, "loss/real": 0.1509433090686798, "rewards/accuracies": 1.0, "rewards/generated": -658.4656982421875, "rewards/margins": 691.2330932617188, "rewards/real": 32.76738739013672, "step": 387 }, { "epoch": 0.81, "grad_norm": 33.62113346147591, "learning_rate": 5.212587789268649e-08, "logits/generated": 0.09888418763875961, "logits/oppo_generated": -2.5671732425689697, "logits/oppo_real": -2.823955535888672, "logits/real": -1.5941078662872314, "logps/generated": -636.989013671875, "logps/oppo_gen": -59.2068977355957, "logps/oppo_real": -214.0272674560547, "logps/real": -246.39768981933594, "loss": 0.3632, "loss/gen": 0.1291210651397705, "loss/real": 0.4964044690132141, "rewards/accuracies": 1.0, "rewards/generated": -577.7821044921875, "rewards/margins": 545.41162109375, "rewards/real": -32.37043380737305, "step": 388 }, { "epoch": 0.81, "grad_norm": 34.79504548681734, "learning_rate": 5.101485558420504e-08, "logits/generated": -0.21139076352119446, "logits/oppo_generated": -2.7977442741394043, "logits/oppo_real": -2.9435877799987793, "logits/real": -2.2349720001220703, "logps/generated": -750.2471923828125, "logps/oppo_gen": -87.367431640625, "logps/oppo_real": -360.75341796875, "logps/real": -329.6597900390625, "loss": 0.241, "loss/gen": 0.057427436113357544, "loss/real": 0.15328460931777954, "rewards/accuracies": 1.0, "rewards/generated": -662.8797607421875, "rewards/margins": 693.9732666015625, "rewards/real": 31.093629837036133, "step": 389 }, { "epoch": 0.82, "grad_norm": 30.21505762084941, "learning_rate": 4.991445467064689e-08, "logits/generated": -0.5020634531974792, "logits/oppo_generated": -2.736494302749634, "logits/oppo_real": -2.5568008422851562, "logits/real": -2.028607130050659, "logps/generated": -551.84033203125, "logps/oppo_gen": -63.113067626953125, "logps/oppo_real": -307.4358215332031, "logps/real": -276.339599609375, "loss": 0.3518, "loss/gen": 0.1953495442867279, "loss/real": 0.1559717357158661, "rewards/accuracies": 1.0, "rewards/generated": -488.72723388671875, "rewards/margins": 519.8234252929688, "rewards/real": 31.096202850341797, "step": 390 }, { "epoch": 0.82, "grad_norm": 34.30040556998743, "learning_rate": 4.882473388902322e-08, "logits/generated": -0.31282973289489746, "logits/oppo_generated": -2.565460681915283, "logits/oppo_real": -2.567373037338257, "logits/real": -2.015244245529175, "logps/generated": -674.6446533203125, "logps/oppo_gen": -81.51359558105469, "logps/oppo_real": -271.79815673828125, "logps/real": -240.47508239746094, "loss": 0.3096, "loss/gen": 0.14029625058174133, "loss/real": 0.15811359882354736, "rewards/accuracies": 1.0, "rewards/generated": -593.1309814453125, "rewards/margins": 624.4541015625, "rewards/real": 31.323078155517578, "step": 391 }, { "epoch": 0.82, "grad_norm": 28.188018591058217, "learning_rate": 4.774575140626316e-08, "logits/generated": -0.5394298434257507, "logits/oppo_generated": -2.7671310901641846, "logits/oppo_real": -2.7871756553649902, "logits/real": -2.0843000411987305, "logps/generated": -701.097900390625, "logps/oppo_gen": -84.26641845703125, "logps/oppo_real": -395.81341552734375, "logps/real": -363.90277099609375, "loss": 0.349, "loss/gen": 0.09549596160650253, "loss/real": 0.1539105772972107, "rewards/accuracies": 1.0, "rewards/generated": -616.8314819335938, "rewards/margins": 648.7420654296875, "rewards/real": 31.910619735717773, "step": 392 }, { "epoch": 0.82, "grad_norm": 58.53682026017364, "learning_rate": 4.667756481610866e-08, "logits/generated": -1.2799639701843262, "logits/oppo_generated": -2.7752625942230225, "logits/oppo_real": -2.965273380279541, "logits/real": -2.285590887069702, "logps/generated": -534.36328125, "logps/oppo_gen": -66.7477035522461, "logps/oppo_real": -274.6632385253906, "logps/real": -244.97430419921875, "loss": 0.3684, "loss/gen": 0.18041257560253143, "loss/real": 0.15445731580257416, "rewards/accuracies": 1.0, "rewards/generated": -467.61553955078125, "rewards/margins": 497.30450439453125, "rewards/real": 29.688926696777344, "step": 393 }, { "epoch": 0.82, "grad_norm": 19.140116383548488, "learning_rate": 4.562023113604041e-08, "logits/generated": 0.006332114338874817, "logits/oppo_generated": -2.8669140338897705, "logits/oppo_real": -2.829169750213623, "logits/real": -2.427414655685425, "logps/generated": -813.6133422851562, "logps/oppo_gen": -90.48013305664062, "logps/oppo_real": -305.9639587402344, "logps/real": -272.2070617675781, "loss": 0.2935, "loss/gen": 0.015020076185464859, "loss/real": 0.1492321491241455, "rewards/accuracies": 1.0, "rewards/generated": -723.13330078125, "rewards/margins": 756.89013671875, "rewards/real": 33.756927490234375, "step": 394 }, { "epoch": 0.83, "grad_norm": 23.55447761100517, "learning_rate": 4.4573806804234335e-08, "logits/generated": 0.7242439985275269, "logits/oppo_generated": -2.8029961585998535, "logits/oppo_real": -2.7388648986816406, "logits/real": -2.2442853450775146, "logps/generated": -746.589111328125, "logps/oppo_gen": -84.95040893554688, "logps/oppo_real": -303.9382629394531, "logps/real": -277.8614196777344, "loss": 0.2665, "loss/gen": 0.023716842755675316, "loss/real": 0.16393963992595673, "rewards/accuracies": 1.0, "rewards/generated": -661.638671875, "rewards/margins": 687.7155151367188, "rewards/real": 26.076847076416016, "step": 395 }, { "epoch": 0.83, "grad_norm": 23.563012884006895, "learning_rate": 4.3538347676548956e-08, "logits/generated": 0.9573155045509338, "logits/oppo_generated": -2.836289882659912, "logits/oppo_real": -2.856013298034668, "logits/real": -2.266578197479248, "logps/generated": -776.6182861328125, "logps/oppo_gen": -75.90487670898438, "logps/oppo_real": -270.5047607421875, "logps/real": -238.49551391601562, "loss": 0.2417, "loss/gen": 0.020842332392930984, "loss/real": 0.15183612704277039, "rewards/accuracies": 1.0, "rewards/generated": -700.71337890625, "rewards/margins": 732.72265625, "rewards/real": 32.009239196777344, "step": 396 }, { "epoch": 0.83, "grad_norm": 26.58242129811189, "learning_rate": 4.251390902354413e-08, "logits/generated": -0.4084343910217285, "logits/oppo_generated": -2.8244986534118652, "logits/oppo_real": -2.845081329345703, "logits/real": -2.4438085556030273, "logps/generated": -735.8739013671875, "logps/oppo_gen": -82.96200561523438, "logps/oppo_real": -377.468017578125, "logps/real": -351.4737548828125, "loss": 0.2848, "loss/gen": 0.046934593468904495, "loss/real": 0.16870608925819397, "rewards/accuracies": 1.0, "rewards/generated": -652.911865234375, "rewards/margins": 678.9061279296875, "rewards/real": 25.994253158569336, "step": 397 }, { "epoch": 0.83, "grad_norm": 33.93199447458751, "learning_rate": 4.1500545527530544e-08, "logits/generated": 0.4591218829154968, "logits/oppo_generated": -2.9145877361297607, "logits/oppo_real": -2.768162727355957, "logits/real": -2.4178929328918457, "logps/generated": -820.8162841796875, "logps/oppo_gen": -72.26435852050781, "logps/oppo_real": -264.8419494628906, "logps/real": -247.36727905273438, "loss": 0.2787, "loss/gen": 0.03440755978226662, "loss/real": 0.18177896738052368, "rewards/accuracies": 1.0, "rewards/generated": -748.5518798828125, "rewards/margins": 766.026611328125, "rewards/real": 17.47467803955078, "step": 398 }, { "epoch": 0.83, "grad_norm": 19.506923131258507, "learning_rate": 4.0498311279651196e-08, "logits/generated": 0.3644871413707733, "logits/oppo_generated": -2.825018882751465, "logits/oppo_real": -2.7147412300109863, "logits/real": -2.2287793159484863, "logps/generated": -812.976318359375, "logps/oppo_gen": -73.07958984375, "logps/oppo_real": -251.62904357910156, "logps/real": -227.3709716796875, "loss": 0.2056, "loss/gen": 0.03383062779903412, "loss/real": 0.16399678587913513, "rewards/accuracies": 1.0, "rewards/generated": -739.896728515625, "rewards/margins": 764.15478515625, "rewards/real": 24.258071899414062, "step": 399 }, { "epoch": 0.84, "grad_norm": 27.640475948227287, "learning_rate": 3.9507259776993954e-08, "logits/generated": -0.3294936716556549, "logits/oppo_generated": -2.8925132751464844, "logits/oppo_real": -3.0092806816101074, "logits/real": -2.379504919052124, "logps/generated": -743.8359985351562, "logps/oppo_gen": -92.540771484375, "logps/oppo_real": -473.1883239746094, "logps/real": -436.2169189453125, "loss": 0.2205, "loss/gen": 0.045243918895721436, "loss/real": 0.14624208211898804, "rewards/accuracies": 1.0, "rewards/generated": -651.2952880859375, "rewards/margins": 688.2666015625, "rewards/real": 36.97139358520508, "step": 400 }, { "epoch": 0.84, "grad_norm": 27.54912493470471, "learning_rate": 3.8527443919736006e-08, "logits/generated": 0.2186594009399414, "logits/oppo_generated": -2.6842846870422363, "logits/oppo_real": -2.769160270690918, "logits/real": -2.1254193782806396, "logps/generated": -810.92626953125, "logps/oppo_gen": -88.25856018066406, "logps/oppo_real": -359.1764831542969, "logps/real": -332.98193359375, "loss": 0.2985, "loss/gen": 0.015042455866932869, "loss/real": 0.16066262125968933, "rewards/accuracies": 1.0, "rewards/generated": -722.667724609375, "rewards/margins": 748.8622436523438, "rewards/real": 26.194541931152344, "step": 401 }, { "epoch": 0.84, "grad_norm": 27.868933047377663, "learning_rate": 3.755891600832026e-08, "logits/generated": 1.6786067485809326, "logits/oppo_generated": -2.787109851837158, "logits/oppo_real": -2.6736297607421875, "logits/real": -2.085148334503174, "logps/generated": -823.227783203125, "logps/oppo_gen": -83.42156982421875, "logps/oppo_real": -251.31224060058594, "logps/real": -221.16571044921875, "loss": 0.2847, "loss/gen": 0.04803692549467087, "loss/real": 0.15670424699783325, "rewards/accuracies": 1.0, "rewards/generated": -739.8062133789062, "rewards/margins": 769.9527587890625, "rewards/real": 30.146543502807617, "step": 402 }, { "epoch": 0.84, "grad_norm": 26.888394872297486, "learning_rate": 3.660172774066339e-08, "logits/generated": 0.3176983594894409, "logits/oppo_generated": -2.6812472343444824, "logits/oppo_real": -2.740905284881592, "logits/real": -1.7640082836151123, "logps/generated": -676.04443359375, "logps/oppo_gen": -76.80146789550781, "logps/oppo_real": -230.00216674804688, "logps/real": -231.5615997314453, "loss": 0.2695, "loss/gen": 0.1879962980747223, "loss/real": 0.293995201587677, "rewards/accuracies": 1.0, "rewards/generated": -599.2429809570312, "rewards/margins": 597.6835327148438, "rewards/real": -1.5594367980957031, "step": 403 }, { "epoch": 0.85, "grad_norm": 39.111272676692906, "learning_rate": 3.565593020939678e-08, "logits/generated": -0.10082553327083588, "logits/oppo_generated": -2.7819857597351074, "logits/oppo_real": -2.6031432151794434, "logits/real": -2.1621930599212646, "logps/generated": -761.4735107421875, "logps/oppo_gen": -71.76890563964844, "logps/oppo_real": -343.7472229003906, "logps/real": -331.093994140625, "loss": 0.3908, "loss/gen": 0.10773513466119766, "loss/real": 0.2386000156402588, "rewards/accuracies": 1.0, "rewards/generated": -689.70458984375, "rewards/margins": 702.3577880859375, "rewards/real": 12.653242111206055, "step": 404 }, { "epoch": 0.85, "grad_norm": 24.426767943948295, "learning_rate": 3.472157389913874e-08, "logits/generated": -0.336029589176178, "logits/oppo_generated": -2.587843894958496, "logits/oppo_real": -2.5900180339813232, "logits/real": -1.9749070405960083, "logps/generated": -670.703857421875, "logps/oppo_gen": -80.07936096191406, "logps/oppo_real": -271.55816650390625, "logps/real": -249.90403747558594, "loss": 0.2879, "loss/gen": 0.2505200207233429, "loss/real": 0.16969501972198486, "rewards/accuracies": 1.0, "rewards/generated": -590.62451171875, "rewards/margins": 612.2786254882812, "rewards/real": 21.654117584228516, "step": 405 }, { "epoch": 0.85, "grad_norm": 28.34683887007123, "learning_rate": 3.3798708683800305e-08, "logits/generated": -0.4590807259082794, "logits/oppo_generated": -2.864154815673828, "logits/oppo_real": -2.9096922874450684, "logits/real": -2.42179536819458, "logps/generated": -784.5616455078125, "logps/oppo_gen": -83.42448425292969, "logps/oppo_real": -366.8133850097656, "logps/real": -336.69012451171875, "loss": 0.3182, "loss/gen": 0.04298119992017746, "loss/real": 0.15586364269256592, "rewards/accuracies": 1.0, "rewards/generated": -701.13720703125, "rewards/margins": 731.260498046875, "rewards/real": 30.12326431274414, "step": 406 }, { "epoch": 0.85, "grad_norm": 30.114497083137756, "learning_rate": 3.288738382392273e-08, "logits/generated": -0.08086250722408295, "logits/oppo_generated": -2.6285908222198486, "logits/oppo_real": -2.733128070831299, "logits/real": -1.9479438066482544, "logps/generated": -670.986328125, "logps/oppo_gen": -78.88270568847656, "logps/oppo_real": -345.356201171875, "logps/real": -317.0779113769531, "loss": 0.2838, "loss/gen": 0.07308726012706757, "loss/real": 0.16547003388404846, "rewards/accuracies": 1.0, "rewards/generated": -592.1036987304688, "rewards/margins": 620.3819580078125, "rewards/real": 28.278263092041016, "step": 407 }, { "epoch": 0.85, "grad_norm": 25.61442593379693, "learning_rate": 3.198764796404807e-08, "logits/generated": -0.07485093921422958, "logits/oppo_generated": -2.663846015930176, "logits/oppo_real": -2.728782892227173, "logits/real": -1.9464470148086548, "logps/generated": -773.8787841796875, "logps/oppo_gen": -71.8175048828125, "logps/oppo_real": -237.12289428710938, "logps/real": -206.4139404296875, "loss": 0.3017, "loss/gen": 0.023999135941267014, "loss/real": 0.15480223298072815, "rewards/accuracies": 1.0, "rewards/generated": -702.0612182617188, "rewards/margins": 732.7701416015625, "rewards/real": 30.708965301513672, "step": 408 }, { "epoch": 0.86, "grad_norm": 47.07785533444174, "learning_rate": 3.109954913012294e-08, "logits/generated": 1.2388019561767578, "logits/oppo_generated": -2.6548547744750977, "logits/oppo_real": -2.708220958709717, "logits/real": -2.0130181312561035, "logps/generated": -757.4107666015625, "logps/oppo_gen": -63.25993728637695, "logps/oppo_real": -208.60333251953125, "logps/real": -193.34283447265625, "loss": 0.2663, "loss/gen": 0.05390400439500809, "loss/real": 0.18397364020347595, "rewards/accuracies": 1.0, "rewards/generated": -694.1507568359375, "rewards/margins": 709.4112548828125, "rewards/real": 15.260494232177734, "step": 409 }, { "epoch": 0.86, "grad_norm": 38.33413279708682, "learning_rate": 3.022313472693447e-08, "logits/generated": 0.11504703760147095, "logits/oppo_generated": -2.7640318870544434, "logits/oppo_real": -2.821716070175171, "logits/real": -1.8772614002227783, "logps/generated": -611.8921508789062, "logps/oppo_gen": -65.58993530273438, "logps/oppo_real": -218.73495483398438, "logps/real": -219.8480987548828, "loss": 0.3002, "loss/gen": 0.06124342978000641, "loss/real": 0.3138381838798523, "rewards/accuracies": 1.0, "rewards/generated": -546.30224609375, "rewards/margins": 545.1890869140625, "rewards/real": -1.113149642944336, "step": 410 }, { "epoch": 0.86, "grad_norm": 28.46425917344361, "learning_rate": 2.935845153558053e-08, "logits/generated": 0.9498586058616638, "logits/oppo_generated": -2.7485408782958984, "logits/oppo_real": -2.6167445182800293, "logits/real": -1.8449242115020752, "logps/generated": -763.2135620117188, "logps/oppo_gen": -85.22071838378906, "logps/oppo_real": -229.88229370117188, "logps/real": -216.39529418945312, "loss": 0.2803, "loss/gen": 0.04368508979678154, "loss/real": 0.18471089005470276, "rewards/accuracies": 1.0, "rewards/generated": -677.9928588867188, "rewards/margins": 691.4798583984375, "rewards/real": 13.486985206604004, "step": 411 }, { "epoch": 0.86, "grad_norm": 22.72136923948879, "learning_rate": 2.8505545710972107e-08, "logits/generated": 0.07348564267158508, "logits/oppo_generated": -2.971525192260742, "logits/oppo_real": -2.9833126068115234, "logits/real": -2.593665361404419, "logps/generated": -748.5177612304688, "logps/oppo_gen": -84.10810852050781, "logps/oppo_real": -449.6239929199219, "logps/real": -409.41595458984375, "loss": 0.2278, "loss/gen": 0.010234430432319641, "loss/real": 0.1431514173746109, "rewards/accuracies": 1.0, "rewards/generated": -664.40966796875, "rewards/margins": 704.61767578125, "rewards/real": 40.208045959472656, "step": 412 }, { "epoch": 0.86, "grad_norm": 32.74029257371565, "learning_rate": 2.766446277937029e-08, "logits/generated": -0.634722113609314, "logits/oppo_generated": -2.7652645111083984, "logits/oppo_real": -3.0290045738220215, "logits/real": -2.1673192977905273, "logps/generated": -838.4489135742188, "logps/oppo_gen": -95.71146392822266, "logps/oppo_real": -355.184326171875, "logps/real": -333.04266357421875, "loss": 0.2874, "loss/gen": 0.015061789192259312, "loss/real": 0.17296290397644043, "rewards/accuracies": 1.0, "rewards/generated": -742.7374267578125, "rewards/margins": 764.879150390625, "rewards/real": 22.141700744628906, "step": 413 }, { "epoch": 0.87, "grad_norm": 27.035180801625177, "learning_rate": 2.683524763595546e-08, "logits/generated": 0.41425225138664246, "logits/oppo_generated": -2.7904341220855713, "logits/oppo_real": -2.7785701751708984, "logits/real": -1.5771113634109497, "logps/generated": -848.3800659179688, "logps/oppo_gen": -72.34674072265625, "logps/oppo_real": -232.07763671875, "logps/real": -246.59060668945312, "loss": 0.3127, "loss/gen": 0.1244996190071106, "loss/real": 0.3740527629852295, "rewards/accuracies": 0.9375, "rewards/generated": -776.0333251953125, "rewards/margins": 761.5203857421875, "rewards/real": -14.51296615600586, "step": 414 }, { "epoch": 0.87, "grad_norm": 40.5459146080866, "learning_rate": 2.601794454243139e-08, "logits/generated": -0.259597510099411, "logits/oppo_generated": -2.692986011505127, "logits/oppo_real": -2.932211399078369, "logits/real": -2.1438608169555664, "logps/generated": -699.6242065429688, "logps/oppo_gen": -74.73764038085938, "logps/oppo_real": -333.96795654296875, "logps/real": -311.9378662109375, "loss": 0.296, "loss/gen": 0.0382215790450573, "loss/real": 0.1780180037021637, "rewards/accuracies": 1.0, "rewards/generated": -624.8865356445312, "rewards/margins": 646.9166259765625, "rewards/real": 22.03009796142578, "step": 415 }, { "epoch": 0.87, "grad_norm": 23.11180604395254, "learning_rate": 2.521259712466256e-08, "logits/generated": 0.33909279108047485, "logits/oppo_generated": -2.8526077270507812, "logits/oppo_real": -2.811613082885742, "logits/real": -2.142138957977295, "logps/generated": -759.60107421875, "logps/oppo_gen": -85.68292236328125, "logps/oppo_real": -291.8996276855469, "logps/real": -274.5881652832031, "loss": 0.281, "loss/gen": 0.12079505622386932, "loss/real": 0.1788100302219391, "rewards/accuracies": 1.0, "rewards/generated": -673.9180908203125, "rewards/margins": 691.2295532226562, "rewards/real": 17.31146240234375, "step": 416 }, { "epoch": 0.87, "grad_norm": 47.16826491237249, "learning_rate": 2.4419248370345285e-08, "logits/generated": 0.8615379333496094, "logits/oppo_generated": -2.636507034301758, "logits/oppo_real": -2.647146701812744, "logits/real": -1.938757300376892, "logps/generated": -693.5307006835938, "logps/oppo_gen": -58.54317092895508, "logps/oppo_real": -258.7288818359375, "logps/real": -235.12351989746094, "loss": 0.3143, "loss/gen": 0.04418793320655823, "loss/real": 0.16570395231246948, "rewards/accuracies": 1.0, "rewards/generated": -634.987548828125, "rewards/margins": 658.5928955078125, "rewards/real": 23.605348587036133, "step": 417 }, { "epoch": 0.87, "grad_norm": 33.410398991065456, "learning_rate": 2.3637940626713342e-08, "logits/generated": -0.08828268945217133, "logits/oppo_generated": -2.8972327709198, "logits/oppo_real": -2.817399024963379, "logits/real": -2.1558895111083984, "logps/generated": -801.6412963867188, "logps/oppo_gen": -77.41506958007812, "logps/oppo_real": -216.58090209960938, "logps/real": -202.15652465820312, "loss": 0.2671, "loss/gen": 0.014140581712126732, "loss/real": 0.18970459699630737, "rewards/accuracies": 1.0, "rewards/generated": -724.2261962890625, "rewards/margins": 738.650634765625, "rewards/real": 14.424371719360352, "step": 418 }, { "epoch": 0.88, "grad_norm": 26.887554979822333, "learning_rate": 2.2868715598277578e-08, "logits/generated": 0.38027942180633545, "logits/oppo_generated": -2.589816093444824, "logits/oppo_real": -2.513615608215332, "logits/real": -1.8797662258148193, "logps/generated": -772.2117309570312, "logps/oppo_gen": -65.45459747314453, "logps/oppo_real": -176.86581420898438, "logps/real": -155.86514282226562, "loss": 0.2818, "loss/gen": 0.017079707235097885, "loss/real": 0.18445941805839539, "rewards/accuracies": 1.0, "rewards/generated": -706.757080078125, "rewards/margins": 727.7578125, "rewards/real": 21.00067901611328, "step": 419 }, { "epoch": 0.88, "grad_norm": 30.502731451077665, "learning_rate": 2.2111614344599684e-08, "logits/generated": 0.21723803877830505, "logits/oppo_generated": -2.741001605987549, "logits/oppo_real": -2.9104204177856445, "logits/real": -2.0397191047668457, "logps/generated": -762.03857421875, "logps/oppo_gen": -73.04945373535156, "logps/oppo_real": -313.0050048828125, "logps/real": -289.78375244140625, "loss": 0.2727, "loss/gen": 0.055183976888656616, "loss/real": 0.16841815412044525, "rewards/accuracies": 1.0, "rewards/generated": -688.9891357421875, "rewards/margins": 712.21044921875, "rewards/real": 23.221250534057617, "step": 420 }, { "epoch": 0.88, "grad_norm": 52.73633561170135, "learning_rate": 2.1366677278100486e-08, "logits/generated": 1.2762212753295898, "logits/oppo_generated": -2.7252440452575684, "logits/oppo_real": -2.489119052886963, "logits/real": -1.8293414115905762, "logps/generated": -715.15185546875, "logps/oppo_gen": -75.96475982666016, "logps/oppo_real": -274.5854797363281, "logps/real": -288.38543701171875, "loss": 0.3453, "loss/gen": 0.0465887188911438, "loss/real": 0.39446061849594116, "rewards/accuracies": 1.0, "rewards/generated": -639.1871337890625, "rewards/margins": 625.38720703125, "rewards/real": -13.799976348876953, "step": 421 }, { "epoch": 0.88, "grad_norm": 31.28419125626542, "learning_rate": 2.0633944161903145e-08, "logits/generated": 0.22932855784893036, "logits/oppo_generated": -2.8465185165405273, "logits/oppo_real": -2.874579668045044, "logits/real": -2.474705457687378, "logps/generated": -794.6787719726562, "logps/oppo_gen": -77.42123413085938, "logps/oppo_real": -269.32452392578125, "logps/real": -232.96438598632812, "loss": 0.2468, "loss/gen": 0.01573217660188675, "loss/real": 0.14619815349578857, "rewards/accuracies": 1.0, "rewards/generated": -717.257568359375, "rewards/margins": 753.61767578125, "rewards/real": 36.360107421875, "step": 422 }, { "epoch": 0.88, "grad_norm": 35.38810007549324, "learning_rate": 1.991345410771017e-08, "logits/generated": 0.016978725790977478, "logits/oppo_generated": -2.8310189247131348, "logits/oppo_real": -2.79268741607666, "logits/real": -2.273961067199707, "logps/generated": -612.0408325195312, "logps/oppo_gen": -61.178497314453125, "logps/oppo_real": -251.74114990234375, "logps/real": -251.0238800048828, "loss": 0.2934, "loss/gen": 0.06265068799257278, "loss/real": 0.2755354940891266, "rewards/accuracies": 1.0, "rewards/generated": -550.8623657226562, "rewards/margins": 551.57958984375, "rewards/real": 0.7172629833221436, "step": 423 }, { "epoch": 0.89, "grad_norm": 32.6389252040652, "learning_rate": 1.9205245573716195e-08, "logits/generated": 0.4440228343009949, "logits/oppo_generated": -2.7672994136810303, "logits/oppo_real": -2.967806577682495, "logits/real": -2.1830549240112305, "logps/generated": -733.4920654296875, "logps/oppo_gen": -83.01123046875, "logps/oppo_real": -284.7894287109375, "logps/real": -254.63729858398438, "loss": 0.2998, "loss/gen": 0.04325643181800842, "loss/real": 0.16018053889274597, "rewards/accuracies": 1.0, "rewards/generated": -650.4808349609375, "rewards/margins": 680.6329956054688, "rewards/real": 30.152151107788086, "step": 424 }, { "epoch": 0.89, "grad_norm": 22.53736724560527, "learning_rate": 1.850935636255496e-08, "logits/generated": 0.13050776720046997, "logits/oppo_generated": -2.674236297607422, "logits/oppo_real": -2.7958316802978516, "logits/real": -2.067239761352539, "logps/generated": -669.11083984375, "logps/oppo_gen": -70.25758361816406, "logps/oppo_real": -324.11871337890625, "logps/real": -289.18328857421875, "loss": 0.276, "loss/gen": 0.06311353296041489, "loss/real": 0.14721855521202087, "rewards/accuracies": 1.0, "rewards/generated": -598.853271484375, "rewards/margins": 633.7886962890625, "rewards/real": 34.93544006347656, "step": 425 }, { "epoch": 0.89, "grad_norm": 37.272236376318865, "learning_rate": 1.7825823619281448e-08, "logits/generated": -0.16331154108047485, "logits/oppo_generated": -2.7839677333831787, "logits/oppo_real": -2.795403480529785, "logits/real": -1.8390673398971558, "logps/generated": -816.87646484375, "logps/oppo_gen": -85.01667785644531, "logps/oppo_real": -272.42120361328125, "logps/real": -262.7786560058594, "loss": 0.3006, "loss/gen": 0.015330037102103233, "loss/real": 0.2208605408668518, "rewards/accuracies": 1.0, "rewards/generated": -731.8598022460938, "rewards/margins": 741.5023193359375, "rewards/real": 9.64252758026123, "step": 426 }, { "epoch": 0.89, "grad_norm": 27.185687125262373, "learning_rate": 1.7154683829389283e-08, "logits/generated": 0.024800747632980347, "logits/oppo_generated": -2.7494869232177734, "logits/oppo_real": -2.8051180839538574, "logits/real": -2.1958751678466797, "logps/generated": -673.5935668945312, "logps/oppo_gen": -77.26094055175781, "logps/oppo_real": -330.8422546386719, "logps/real": -315.17755126953125, "loss": 0.2735, "loss/gen": 0.0413350835442543, "loss/real": 0.23769381642341614, "rewards/accuracies": 1.0, "rewards/generated": -596.3326416015625, "rewards/margins": 611.997314453125, "rewards/real": 15.664695739746094, "step": 427 }, { "epoch": 0.9, "grad_norm": 25.722542057928003, "learning_rate": 1.649597281686302e-08, "logits/generated": 1.0848944187164307, "logits/oppo_generated": -2.645679473876953, "logits/oppo_real": -2.5204734802246094, "logits/real": -1.728602409362793, "logps/generated": -722.6002197265625, "logps/oppo_gen": -55.84619903564453, "logps/oppo_real": -248.24684143066406, "logps/real": -255.62530517578125, "loss": 0.2611, "loss/gen": 0.11462613195180893, "loss/real": 0.43046581745147705, "rewards/accuracies": 1.0, "rewards/generated": -666.7540283203125, "rewards/margins": 659.3756103515625, "rewards/real": -7.378480911254883, "step": 428 }, { "epoch": 0.9, "grad_norm": 35.29333343233004, "learning_rate": 1.584972574226623e-08, "logits/generated": -0.342296838760376, "logits/oppo_generated": -2.5918760299682617, "logits/oppo_real": -2.8728556632995605, "logits/real": -1.588826298713684, "logps/generated": -894.3289794921875, "logps/oppo_gen": -103.07553100585938, "logps/oppo_real": -180.5751190185547, "logps/real": -166.38107299804688, "loss": 0.334, "loss/gen": 0.013701886869966984, "loss/real": 0.25664207339286804, "rewards/accuracies": 1.0, "rewards/generated": -791.25341796875, "rewards/margins": 805.4474487304688, "rewards/real": 14.194051742553711, "step": 429 }, { "epoch": 0.9, "grad_norm": 28.738920249539213, "learning_rate": 1.521597710086439e-08, "logits/generated": -0.3944983184337616, "logits/oppo_generated": -2.97127366065979, "logits/oppo_real": -2.8858768939971924, "logits/real": -2.4181745052337646, "logps/generated": -694.18017578125, "logps/oppo_gen": -90.01107788085938, "logps/oppo_real": -335.91571044921875, "logps/real": -312.7785339355469, "loss": 0.2704, "loss/gen": 0.10327526926994324, "loss/real": 0.1773584485054016, "rewards/accuracies": 1.0, "rewards/generated": -604.1690673828125, "rewards/margins": 627.3062133789062, "rewards/real": 23.137134552001953, "step": 430 }, { "epoch": 0.9, "grad_norm": 35.86587461521136, "learning_rate": 1.459476072078386e-08, "logits/generated": 0.22406895458698273, "logits/oppo_generated": -2.885314464569092, "logits/oppo_real": -2.864060163497925, "logits/real": -2.2564728260040283, "logps/generated": -758.191650390625, "logps/oppo_gen": -88.2413558959961, "logps/oppo_real": -357.7693176269531, "logps/real": -357.68109130859375, "loss": 0.3381, "loss/gen": 0.01729278452694416, "loss/real": 0.27993616461753845, "rewards/accuracies": 1.0, "rewards/generated": -669.9502563476562, "rewards/margins": 670.0384521484375, "rewards/real": 0.08818435668945312, "step": 431 }, { "epoch": 0.9, "grad_norm": 33.148304007803326, "learning_rate": 1.3986109761206093e-08, "logits/generated": -0.19652684032917023, "logits/oppo_generated": -2.7991626262664795, "logits/oppo_real": -2.64943265914917, "logits/real": -2.2657108306884766, "logps/generated": -740.3734741210938, "logps/oppo_gen": -112.3591079711914, "logps/oppo_real": -486.785400390625, "logps/real": -473.11456298828125, "loss": 0.3165, "loss/gen": 0.1387334167957306, "loss/real": 0.19511449337005615, "rewards/accuracies": 0.9375, "rewards/generated": -628.0143432617188, "rewards/margins": 641.6851806640625, "rewards/real": 13.670815467834473, "step": 432 }, { "epoch": 0.91, "grad_norm": 28.601751356587634, "learning_rate": 1.3390056710597647e-08, "logits/generated": -0.7066829204559326, "logits/oppo_generated": -2.7696914672851562, "logits/oppo_real": -2.9772748947143555, "logits/real": -2.063112735748291, "logps/generated": -639.30126953125, "logps/oppo_gen": -70.7381591796875, "logps/oppo_real": -258.11444091796875, "logps/real": -261.44818115234375, "loss": 0.3163, "loss/gen": 0.1252206563949585, "loss/real": 0.26253828406333923, "rewards/accuracies": 0.9375, "rewards/generated": -568.5631103515625, "rewards/margins": 565.2293701171875, "rewards/real": -3.333735227584839, "step": 433 }, { "epoch": 0.91, "grad_norm": 30.687004753513946, "learning_rate": 1.280663338497609e-08, "logits/generated": -0.32708024978637695, "logits/oppo_generated": -2.801250457763672, "logits/oppo_real": -2.8633522987365723, "logits/real": -2.3736109733581543, "logps/generated": -758.1578979492188, "logps/oppo_gen": -84.30010223388672, "logps/oppo_real": -478.0521545410156, "logps/real": -439.0254821777344, "loss": 0.3319, "loss/gen": 0.048829175531864166, "loss/real": 0.14225539565086365, "rewards/accuracies": 1.0, "rewards/generated": -673.8578491210938, "rewards/margins": 712.884521484375, "rewards/real": 39.02668762207031, "step": 434 }, { "epoch": 0.91, "grad_norm": 32.42948894030998, "learning_rate": 1.2235870926211616e-08, "logits/generated": -0.18947938084602356, "logits/oppo_generated": -2.709965467453003, "logits/oppo_real": -2.865321159362793, "logits/real": -1.8958816528320312, "logps/generated": -598.7344970703125, "logps/oppo_gen": -56.259098052978516, "logps/oppo_real": -212.92286682128906, "logps/real": -202.73770141601562, "loss": 0.2694, "loss/gen": 0.23359569907188416, "loss/real": 0.22692254185676575, "rewards/accuracies": 1.0, "rewards/generated": -542.475341796875, "rewards/margins": 552.6605224609375, "rewards/real": 10.185166358947754, "step": 435 }, { "epoch": 0.91, "grad_norm": 28.126094813760695, "learning_rate": 1.1677799800364957e-08, "logits/generated": -0.07881470024585724, "logits/oppo_generated": -2.677896499633789, "logits/oppo_real": -2.768991470336914, "logits/real": -2.2843446731567383, "logps/generated": -792.887939453125, "logps/oppo_gen": -83.10468292236328, "logps/oppo_real": -386.85968017578125, "logps/real": -352.83319091796875, "loss": 0.2329, "loss/gen": 0.0062789106741547585, "loss/real": 0.15011855959892273, "rewards/accuracies": 1.0, "rewards/generated": -709.7833251953125, "rewards/margins": 743.8096923828125, "rewards/real": 34.026451110839844, "step": 436 }, { "epoch": 0.91, "grad_norm": 27.066375525401092, "learning_rate": 1.1132449796060873e-08, "logits/generated": 0.4625094532966614, "logits/oppo_generated": -2.781583309173584, "logits/oppo_real": -2.9280033111572266, "logits/real": -1.8447434902191162, "logps/generated": -665.0208129882812, "logps/oppo_gen": -66.32365417480469, "logps/oppo_real": -339.19329833984375, "logps/real": -353.3502197265625, "loss": 0.3796, "loss/gen": 0.04067763313651085, "loss/real": 0.3821491003036499, "rewards/accuracies": 1.0, "rewards/generated": -598.6971435546875, "rewards/margins": 584.5401611328125, "rewards/real": -14.156988143920898, "step": 437 }, { "epoch": 0.92, "grad_norm": 38.24305173308447, "learning_rate": 1.0599850022898537e-08, "logits/generated": 0.2761492133140564, "logits/oppo_generated": -2.4901785850524902, "logits/oppo_real": -2.6094114780426025, "logits/real": -1.6084918975830078, "logps/generated": -875.5096435546875, "logps/oppo_gen": -53.723915100097656, "logps/oppo_real": -182.81817626953125, "logps/real": -147.58262634277344, "loss": 0.3627, "loss/gen": 0.16688652336597443, "loss/real": 0.15126757323741913, "rewards/accuracies": 1.0, "rewards/generated": -821.7857055664062, "rewards/margins": 857.021240234375, "rewards/real": 35.23555374145508, "step": 438 }, { "epoch": 0.92, "grad_norm": 23.49628695109679, "learning_rate": 1.0080028909897232e-08, "logits/generated": 0.6079811453819275, "logits/oppo_generated": -2.515484571456909, "logits/oppo_real": -2.485753297805786, "logits/real": -1.7542835474014282, "logps/generated": -616.97265625, "logps/oppo_gen": -80.76638793945312, "logps/oppo_real": -202.42733764648438, "logps/real": -179.23358154296875, "loss": 0.2542, "loss/gen": 0.20540261268615723, "loss/real": 0.18645432591438293, "rewards/accuracies": 1.0, "rewards/generated": -536.206298828125, "rewards/margins": 559.4000244140625, "rewards/real": 23.193754196166992, "step": 439 }, { "epoch": 0.92, "grad_norm": 23.16906564856131, "learning_rate": 9.57301420397924e-09, "logits/generated": 0.6640872359275818, "logits/oppo_generated": -2.4124038219451904, "logits/oppo_real": -2.5056729316711426, "logits/real": -1.7653595209121704, "logps/generated": -810.7935180664062, "logps/oppo_gen": -60.34915542602539, "logps/oppo_real": -184.50430297851562, "logps/real": -151.20101928710938, "loss": 0.2569, "loss/gen": 0.09386168420314789, "loss/real": 0.14957281947135925, "rewards/accuracies": 1.0, "rewards/generated": -750.4443969726562, "rewards/margins": 783.7476806640625, "rewards/real": 33.30329132080078, "step": 440 }, { "epoch": 0.92, "grad_norm": 17.140997994745213, "learning_rate": 9.078832968488632e-09, "logits/generated": 0.8487217426300049, "logits/oppo_generated": -2.730281352996826, "logits/oppo_real": -2.6713297367095947, "logits/real": -2.253817081451416, "logps/generated": -688.5202026367188, "logps/oppo_gen": -72.50233459472656, "logps/oppo_real": -303.8031921386719, "logps/real": -273.73455810546875, "loss": 0.2646, "loss/gen": 0.04261288791894913, "loss/real": 0.1547403186559677, "rewards/accuracies": 1.0, "rewards/generated": -616.017822265625, "rewards/margins": 646.08642578125, "rewards/real": 30.06861686706543, "step": 441 }, { "epoch": 0.92, "grad_norm": 41.81041690672389, "learning_rate": 8.597511581746625e-09, "logits/generated": 0.5856022834777832, "logits/oppo_generated": -2.798659324645996, "logits/oppo_real": -2.809553384780884, "logits/real": -1.9602468013763428, "logps/generated": -650.50244140625, "logps/oppo_gen": -71.67813110351562, "logps/oppo_real": -249.2584228515625, "logps/real": -257.9971008300781, "loss": 0.3944, "loss/gen": 0.13761942088603973, "loss/real": 0.3325386047363281, "rewards/accuracies": 1.0, "rewards/generated": -578.82421875, "rewards/margins": 570.0855712890625, "rewards/real": -8.738664627075195, "step": 442 }, { "epoch": 0.93, "grad_norm": 30.877363230343903, "learning_rate": 8.129075735643698e-09, "logits/generated": -0.38423842191696167, "logits/oppo_generated": -2.6113674640655518, "logits/oppo_real": -2.7933380603790283, "logits/real": -1.9059512615203857, "logps/generated": -570.1771240234375, "logps/oppo_gen": -70.2498779296875, "logps/oppo_real": -249.642822265625, "logps/real": -235.60647583007812, "loss": 0.3212, "loss/gen": 0.09656841307878494, "loss/real": 0.22677713632583618, "rewards/accuracies": 0.9375, "rewards/generated": -499.92724609375, "rewards/margins": 513.963623046875, "rewards/real": 14.036325454711914, "step": 443 }, { "epoch": 0.93, "grad_norm": 32.42138930359092, "learning_rate": 7.673550434268123e-09, "logits/generated": 0.3789711892604828, "logits/oppo_generated": -2.5773496627807617, "logits/oppo_real": -2.756646156311035, "logits/real": -1.5774751901626587, "logps/generated": -713.83984375, "logps/oppo_gen": -67.772705078125, "logps/oppo_real": -219.92550659179688, "logps/real": -217.2390594482422, "loss": 0.2387, "loss/gen": 0.13536104559898376, "loss/real": 0.26523107290267944, "rewards/accuracies": 0.9375, "rewards/generated": -646.0671997070312, "rewards/margins": 648.753662109375, "rewards/real": 2.686431884765625, "step": 444 }, { "epoch": 0.93, "grad_norm": 35.38876581761153, "learning_rate": 7.230959992571367e-09, "logits/generated": -0.14755092561244965, "logits/oppo_generated": -2.5365023612976074, "logits/oppo_real": -2.726724624633789, "logits/real": -1.5336180925369263, "logps/generated": -655.31982421875, "logps/oppo_gen": -64.62477111816406, "logps/oppo_real": -251.58450317382812, "logps/real": -238.06442260742188, "loss": 0.3278, "loss/gen": 0.14248934388160706, "loss/real": 0.23805347084999084, "rewards/accuracies": 0.9375, "rewards/generated": -590.695068359375, "rewards/margins": 604.215087890625, "rewards/real": 13.520094871520996, "step": 445 }, { "epoch": 0.93, "grad_norm": 20.637570890862875, "learning_rate": 6.801328035070136e-09, "logits/generated": -0.11328282952308655, "logits/oppo_generated": -2.7569596767425537, "logits/oppo_real": -2.9033079147338867, "logits/real": -1.7462917566299438, "logps/generated": -716.6192626953125, "logps/oppo_gen": -84.91046142578125, "logps/oppo_real": -289.9277038574219, "logps/real": -317.48663330078125, "loss": 0.3271, "loss/gen": 0.16633838415145874, "loss/real": 0.4990673363208771, "rewards/accuracies": 1.0, "rewards/generated": -631.7088623046875, "rewards/margins": 604.1499633789062, "rewards/real": -27.558929443359375, "step": 446 }, { "epoch": 0.94, "grad_norm": 78.71668442109335, "learning_rate": 6.38467749458535e-09, "logits/generated": 1.1556400060653687, "logits/oppo_generated": -2.6448044776916504, "logits/oppo_real": -2.7699906826019287, "logits/real": -1.9819279909133911, "logps/generated": -715.257568359375, "logps/oppo_gen": -44.87987518310547, "logps/oppo_real": -182.60540771484375, "logps/real": -185.8890380859375, "loss": 0.3929, "loss/gen": 0.1545814424753189, "loss/real": 0.30889979004859924, "rewards/accuracies": 0.9375, "rewards/generated": -670.377685546875, "rewards/margins": 667.093994140625, "rewards/real": -3.2836532592773438, "step": 447 }, { "epoch": 0.94, "grad_norm": 45.8729403914361, "learning_rate": 5.981030611018234e-09, "logits/generated": -0.1802346408367157, "logits/oppo_generated": -2.7766942977905273, "logits/oppo_real": -2.8434033393859863, "logits/real": -2.057481527328491, "logps/generated": -569.3570556640625, "logps/oppo_gen": -67.70870971679688, "logps/oppo_real": -264.5067138671875, "logps/real": -242.7213134765625, "loss": 0.3662, "loss/gen": 0.2699339985847473, "loss/real": 0.1798723340034485, "rewards/accuracies": 0.9375, "rewards/generated": -501.6483154296875, "rewards/margins": 523.4337768554688, "rewards/real": 21.785402297973633, "step": 448 }, { "epoch": 0.94, "grad_norm": 47.73978079046734, "learning_rate": 5.590408930162799e-09, "logits/generated": -0.4504716992378235, "logits/oppo_generated": -2.562833309173584, "logits/oppo_real": -2.5892672538757324, "logits/real": -1.9565715789794922, "logps/generated": -655.5435791015625, "logps/oppo_gen": -69.748779296875, "logps/oppo_real": -302.15716552734375, "logps/real": -293.0556640625, "loss": 0.3746, "loss/gen": 0.044189054518938065, "loss/real": 0.27661794424057007, "rewards/accuracies": 1.0, "rewards/generated": -585.7947998046875, "rewards/margins": 594.8963012695312, "rewards/real": 9.101491928100586, "step": 449 }, { "epoch": 0.94, "grad_norm": 36.17708183366923, "learning_rate": 5.212833302556258e-09, "logits/generated": 0.7933423519134521, "logits/oppo_generated": -2.8123812675476074, "logits/oppo_real": -2.7088489532470703, "logits/real": -2.274021625518799, "logps/generated": -740.898681640625, "logps/oppo_gen": -99.06866455078125, "logps/oppo_real": -265.1106262207031, "logps/real": -246.4234619140625, "loss": 0.2583, "loss/gen": 0.02867821604013443, "loss/real": 0.17321571707725525, "rewards/accuracies": 1.0, "rewards/generated": -641.8300170898438, "rewards/margins": 660.5171508789062, "rewards/real": 18.687145233154297, "step": 450 }, { "epoch": 0.94, "grad_norm": 32.52973962331911, "learning_rate": 4.848323882365668e-09, "logits/generated": -0.29569417238235474, "logits/oppo_generated": -2.724055290222168, "logits/oppo_real": -2.7838587760925293, "logits/real": -1.9440395832061768, "logps/generated": -734.2264404296875, "logps/oppo_gen": -72.73839569091797, "logps/oppo_real": -309.91082763671875, "logps/real": -293.07879638671875, "loss": 0.2935, "loss/gen": 0.03302386775612831, "loss/real": 0.19048979878425598, "rewards/accuracies": 1.0, "rewards/generated": -661.488037109375, "rewards/margins": 678.320068359375, "rewards/real": 16.832040786743164, "step": 451 }, { "epoch": 0.95, "grad_norm": 22.549430295256354, "learning_rate": 4.496900126312431e-09, "logits/generated": 0.7788766622543335, "logits/oppo_generated": -2.452781915664673, "logits/oppo_real": -2.67744779586792, "logits/real": -1.6308937072753906, "logps/generated": -589.6851806640625, "logps/oppo_gen": -52.5155029296875, "logps/oppo_real": -223.33140563964844, "logps/real": -205.42462158203125, "loss": 0.3002, "loss/gen": 0.08183200657367706, "loss/real": 0.2042657434940338, "rewards/accuracies": 1.0, "rewards/generated": -537.169677734375, "rewards/margins": 555.076416015625, "rewards/real": 17.906780242919922, "step": 452 }, { "epoch": 0.95, "grad_norm": 34.657315484635596, "learning_rate": 4.158580792633482e-09, "logits/generated": 1.6620922088623047, "logits/oppo_generated": -2.679365634918213, "logits/oppo_real": -2.71950101852417, "logits/real": -2.00820255279541, "logps/generated": -671.602294921875, "logps/oppo_gen": -63.20152282714844, "logps/oppo_real": -247.9625244140625, "logps/real": -214.57554626464844, "loss": 0.2825, "loss/gen": 0.035797297954559326, "loss/real": 0.15221944451332092, "rewards/accuracies": 1.0, "rewards/generated": -608.4007568359375, "rewards/margins": 641.7877197265625, "rewards/real": 33.38697814941406, "step": 453 }, { "epoch": 0.95, "grad_norm": 31.58702249713282, "learning_rate": 3.833383940080231e-09, "logits/generated": 0.5262564420700073, "logits/oppo_generated": -2.853964328765869, "logits/oppo_real": -2.8066015243530273, "logits/real": -2.2472078800201416, "logps/generated": -775.4197998046875, "logps/oppo_gen": -89.24186706542969, "logps/oppo_real": -313.77490234375, "logps/real": -293.4193420410156, "loss": 0.293, "loss/gen": 0.056602317839860916, "loss/real": 0.16948629915714264, "rewards/accuracies": 1.0, "rewards/generated": -686.177978515625, "rewards/margins": 706.533447265625, "rewards/real": 20.35555648803711, "step": 454 }, { "epoch": 0.95, "grad_norm": 24.27373169277787, "learning_rate": 3.521326926954532e-09, "logits/generated": -0.06063704192638397, "logits/oppo_generated": -2.945572853088379, "logits/oppo_real": -2.857299327850342, "logits/real": -2.3750839233398438, "logps/generated": -742.7252197265625, "logps/oppo_gen": -75.34283447265625, "logps/oppo_real": -377.9549560546875, "logps/real": -361.899658203125, "loss": 0.2313, "loss/gen": 0.11743300408124924, "loss/real": 0.24999423325061798, "rewards/accuracies": 1.0, "rewards/generated": -667.38232421875, "rewards/margins": 683.4376220703125, "rewards/real": 16.055313110351562, "step": 455 }, { "epoch": 0.95, "grad_norm": 19.96226374630275, "learning_rate": 3.2224264101821108e-09, "logits/generated": 0.8330753445625305, "logits/oppo_generated": -2.574817657470703, "logits/oppo_real": -2.46460223197937, "logits/real": -1.7454832792282104, "logps/generated": -636.5455322265625, "logps/oppo_gen": -70.7989501953125, "logps/oppo_real": -374.28759765625, "logps/real": -379.6514587402344, "loss": 0.3385, "loss/gen": 0.19702856242656708, "loss/real": 0.40760496258735657, "rewards/accuracies": 0.9375, "rewards/generated": -565.74658203125, "rewards/margins": 560.3827514648438, "rewards/real": -5.363855361938477, "step": 456 }, { "epoch": 0.96, "grad_norm": 27.184910302307994, "learning_rate": 2.936698344423505e-09, "logits/generated": -0.41058021783828735, "logits/oppo_generated": -2.9557366371154785, "logits/oppo_real": -2.8256478309631348, "logits/real": -2.5244743824005127, "logps/generated": -757.3851318359375, "logps/oppo_gen": -91.42108154296875, "logps/oppo_real": -355.4400634765625, "logps/real": -349.91094970703125, "loss": 0.2823, "loss/gen": 0.01450443733483553, "loss/real": 0.27503716945648193, "rewards/accuracies": 1.0, "rewards/generated": -665.9639892578125, "rewards/margins": 671.4931640625, "rewards/real": 5.529134750366211, "step": 457 }, { "epoch": 0.96, "grad_norm": 34.244138341359786, "learning_rate": 2.664157981222437e-09, "logits/generated": -0.20761600136756897, "logits/oppo_generated": -2.706226348876953, "logits/oppo_real": -2.7390899658203125, "logits/real": -2.1303887367248535, "logps/generated": -750.4144287109375, "logps/oppo_gen": -83.41449737548828, "logps/oppo_real": -231.79632568359375, "logps/real": -206.0238037109375, "loss": 0.2741, "loss/gen": 0.05449531227350235, "loss/real": 0.16203711926937103, "rewards/accuracies": 1.0, "rewards/generated": -666.9998779296875, "rewards/margins": 692.7724609375, "rewards/real": 25.772523880004883, "step": 458 }, { "epoch": 0.96, "grad_norm": 48.74286055493794, "learning_rate": 2.4048198681917154e-09, "logits/generated": 0.6840654015541077, "logits/oppo_generated": -2.736264705657959, "logits/oppo_real": -2.906867742538452, "logits/real": -1.9091205596923828, "logps/generated": -722.01708984375, "logps/oppo_gen": -76.57756042480469, "logps/oppo_real": -386.65283203125, "logps/real": -359.4886474609375, "loss": 0.3591, "loss/gen": 0.03984036296606064, "loss/real": 0.1646542102098465, "rewards/accuracies": 1.0, "rewards/generated": -645.439453125, "rewards/margins": 672.6036376953125, "rewards/real": 27.164186477661133, "step": 459 }, { "epoch": 0.96, "grad_norm": 33.56559139626844, "learning_rate": 2.158697848236607e-09, "logits/generated": -0.3475767970085144, "logits/oppo_generated": -2.688716411590576, "logits/oppo_real": -2.695068359375, "logits/real": -1.9637393951416016, "logps/generated": -757.3863525390625, "logps/oppo_gen": -84.60350036621094, "logps/oppo_real": -197.73443603515625, "logps/real": -187.57839965820312, "loss": 0.2813, "loss/gen": 0.021964294835925102, "loss/real": 0.23046338558197021, "rewards/accuracies": 1.0, "rewards/generated": -672.782958984375, "rewards/margins": 682.93896484375, "rewards/real": 10.156058311462402, "step": 460 }, { "epoch": 0.96, "grad_norm": 20.376350217675377, "learning_rate": 1.9258050588161766e-09, "logits/generated": 0.5216965675354004, "logits/oppo_generated": -2.673025369644165, "logits/oppo_real": -2.726318836212158, "logits/real": -2.057188034057617, "logps/generated": -774.2880249023438, "logps/oppo_gen": -65.60742950439453, "logps/oppo_real": -229.56605529785156, "logps/real": -203.36492919921875, "loss": 0.2584, "loss/gen": 0.03890099376440048, "loss/real": 0.16776156425476074, "rewards/accuracies": 1.0, "rewards/generated": -708.6805419921875, "rewards/margins": 734.8817138671875, "rewards/real": 26.201156616210938, "step": 461 }, { "epoch": 0.97, "grad_norm": 26.108272764361132, "learning_rate": 1.7061539312417107e-09, "logits/generated": 1.1993634700775146, "logits/oppo_generated": -2.521968126296997, "logits/oppo_real": -2.3472464084625244, "logits/real": -1.8873664140701294, "logps/generated": -759.1806640625, "logps/oppo_gen": -79.24931335449219, "logps/oppo_real": -204.78036499023438, "logps/real": -183.12274169921875, "loss": 0.264, "loss/gen": 0.04187949746847153, "loss/real": 0.1729775071144104, "rewards/accuracies": 1.0, "rewards/generated": -679.9313354492188, "rewards/margins": 701.5889892578125, "rewards/real": 21.657615661621094, "step": 462 }, { "epoch": 0.97, "grad_norm": 31.69596530015443, "learning_rate": 1.4997561900135236e-09, "logits/generated": 0.6135013699531555, "logits/oppo_generated": -2.7849419116973877, "logits/oppo_real": -2.7172937393188477, "logits/real": -2.012725591659546, "logps/generated": -804.5126953125, "logps/oppo_gen": -67.21133422851562, "logps/oppo_real": -265.61785888671875, "logps/real": -281.4776611328125, "loss": 0.3281, "loss/gen": 0.07727505266666412, "loss/real": 0.3425220847129822, "rewards/accuracies": 1.0, "rewards/generated": -737.3013916015625, "rewards/margins": 721.441650390625, "rewards/real": -15.859792709350586, "step": 463 }, { "epoch": 0.97, "grad_norm": 46.92561590149217, "learning_rate": 1.3066228521948219e-09, "logits/generated": -0.23002564907073975, "logits/oppo_generated": -2.977627754211426, "logits/oppo_real": -2.8772168159484863, "logits/real": -2.613190174102783, "logps/generated": -731.850341796875, "logps/oppo_gen": -94.8328857421875, "logps/oppo_real": -499.3686828613281, "logps/real": -466.97076416015625, "loss": 0.2805, "loss/gen": 0.03973410278558731, "loss/real": 0.1506689488887787, "rewards/accuracies": 1.0, "rewards/generated": -637.017333984375, "rewards/margins": 669.4153442382812, "rewards/real": 32.39790725708008, "step": 464 }, { "epoch": 0.97, "grad_norm": 30.14508881052652, "learning_rate": 1.126764226823812e-09, "logits/generated": 0.786093533039093, "logits/oppo_generated": -2.6228816509246826, "logits/oppo_real": -2.6832354068756104, "logits/real": -1.7279415130615234, "logps/generated": -825.026611328125, "logps/oppo_gen": -65.4859619140625, "logps/oppo_real": -178.48684692382812, "logps/real": -175.68511962890625, "loss": 0.3055, "loss/gen": 0.07098890841007233, "loss/real": 0.29667070508003235, "rewards/accuracies": 1.0, "rewards/generated": -759.540771484375, "rewards/margins": 762.3424072265625, "rewards/real": 2.8017330169677734, "step": 465 }, { "epoch": 0.97, "grad_norm": 27.821471955268876, "learning_rate": 9.60189914363363e-10, "logits/generated": 0.8292776346206665, "logits/oppo_generated": -2.7453532218933105, "logits/oppo_real": -2.565095901489258, "logits/real": -2.1934657096862793, "logps/generated": -717.3662109375, "logps/oppo_gen": -77.26994323730469, "logps/oppo_real": -337.29949951171875, "logps/real": -320.40087890625, "loss": 0.2969, "loss/gen": 0.027588829398155212, "loss/real": 0.21265217661857605, "rewards/accuracies": 1.0, "rewards/generated": -640.0963134765625, "rewards/margins": 656.9949951171875, "rewards/real": 16.89865493774414, "step": 466 }, { "epoch": 0.98, "grad_norm": 23.094122650071867, "learning_rate": 8.069088061885276e-10, "logits/generated": 0.2578420042991638, "logits/oppo_generated": -2.66814923286438, "logits/oppo_real": -2.721468448638916, "logits/real": -1.9464046955108643, "logps/generated": -617.5042724609375, "logps/oppo_gen": -68.08262634277344, "logps/oppo_real": -245.5079345703125, "logps/real": -245.2777099609375, "loss": 0.2767, "loss/gen": 0.13515013456344604, "loss/real": 0.2663593292236328, "rewards/accuracies": 1.0, "rewards/generated": -549.421630859375, "rewards/margins": 549.65185546875, "rewards/real": 0.23020458221435547, "step": 467 }, { "epoch": 0.98, "grad_norm": 32.73517675032472, "learning_rate": 6.66929084112089e-10, "logits/generated": -0.0217684805393219, "logits/oppo_generated": -2.8137381076812744, "logits/oppo_real": -2.701899528503418, "logits/real": -2.3723936080932617, "logps/generated": -732.671875, "logps/oppo_gen": -95.51661682128906, "logps/oppo_real": -365.8069152832031, "logps/real": -337.0956115722656, "loss": 0.3006, "loss/gen": 0.017105087637901306, "loss/real": 0.17143797874450684, "rewards/accuracies": 1.0, "rewards/generated": -637.1552124023438, "rewards/margins": 665.8665161132812, "rewards/real": 28.711307525634766, "step": 468 }, { "epoch": 0.98, "grad_norm": 35.94339965161372, "learning_rate": 5.402582199476036e-10, "logits/generated": 0.8715763092041016, "logits/oppo_generated": -2.877156972885132, "logits/oppo_real": -2.714019298553467, "logits/real": -2.1983542442321777, "logps/generated": -741.3756103515625, "logps/oppo_gen": -62.887359619140625, "logps/oppo_real": -226.98411560058594, "logps/real": -201.53179931640625, "loss": 0.2849, "loss/gen": 0.026809904724359512, "loss/real": 0.16713371872901917, "rewards/accuracies": 1.0, "rewards/generated": -678.4882202148438, "rewards/margins": 703.9405517578125, "rewards/real": 25.452287673950195, "step": 469 }, { "epoch": 0.98, "grad_norm": 48.31405641089351, "learning_rate": 4.269029751107489e-10, "logits/generated": 0.46310731768608093, "logits/oppo_generated": -2.614527702331543, "logits/oppo_real": -2.721973419189453, "logits/real": -1.800484299659729, "logps/generated": -641.4134521484375, "logps/oppo_gen": -52.66883087158203, "logps/oppo_real": -169.48345947265625, "logps/real": -150.42047119140625, "loss": 0.3255, "loss/gen": 0.054055292159318924, "loss/real": 0.17102661728858948, "rewards/accuracies": 1.0, "rewards/generated": -588.7446899414062, "rewards/margins": 607.8077392578125, "rewards/real": 19.062992095947266, "step": 470 }, { "epoch": 0.99, "grad_norm": 27.930841322320678, "learning_rate": 3.2686940025836164e-10, "logits/generated": 1.4157593250274658, "logits/oppo_generated": -2.5730538368225098, "logits/oppo_real": -2.5419564247131348, "logits/real": -1.729305386543274, "logps/generated": -710.6050415039062, "logps/oppo_gen": -77.87423706054688, "logps/oppo_real": -270.4675598144531, "logps/real": -268.3628234863281, "loss": 0.3195, "loss/gen": 0.027144107967615128, "loss/real": 0.2538290023803711, "rewards/accuracies": 1.0, "rewards/generated": -632.7308349609375, "rewards/margins": 634.8355102539062, "rewards/real": 2.104731559753418, "step": 471 }, { "epoch": 0.99, "grad_norm": 39.48121833938892, "learning_rate": 2.4016283496544607e-10, "logits/generated": -0.10758259892463684, "logits/oppo_generated": -2.6745200157165527, "logits/oppo_real": -2.645476818084717, "logits/real": -1.9466335773468018, "logps/generated": -801.3179321289062, "logps/oppo_gen": -101.33052825927734, "logps/oppo_real": -358.234375, "logps/real": -353.99456787109375, "loss": 0.3131, "loss/gen": 0.04514200612902641, "loss/real": 0.2479880452156067, "rewards/accuracies": 1.0, "rewards/generated": -699.9874267578125, "rewards/margins": 704.227294921875, "rewards/real": 4.239832878112793, "step": 472 }, { "epoch": 0.99, "grad_norm": 24.568082917521007, "learning_rate": 1.6678790744015236e-10, "logits/generated": 0.45618465542793274, "logits/oppo_generated": -2.753333330154419, "logits/oppo_real": -2.8271706104278564, "logits/real": -2.0780029296875, "logps/generated": -710.4241943359375, "logps/oppo_gen": -72.44290924072266, "logps/oppo_real": -266.6786193847656, "logps/real": -258.2034912109375, "loss": 0.2472, "loss/gen": 0.012053638696670532, "loss/real": 0.21844035387039185, "rewards/accuracies": 1.0, "rewards/generated": -637.9812622070312, "rewards/margins": 646.4563598632812, "rewards/real": 8.475127220153809, "step": 473 }, { "epoch": 0.99, "grad_norm": 30.222697397622888, "learning_rate": 1.0674853427683484e-10, "logits/generated": -0.06531578302383423, "logits/oppo_generated": -2.4703428745269775, "logits/oppo_real": -2.778745174407959, "logits/real": -1.5166780948638916, "logps/generated": -741.3477783203125, "logps/oppo_gen": -59.79944610595703, "logps/oppo_real": -181.95498657226562, "logps/real": -220.44729614257812, "loss": 0.3104, "loss/gen": 0.060335446149110794, "loss/real": 0.39393651485443115, "rewards/accuracies": 1.0, "rewards/generated": -681.5482788085938, "rewards/margins": 643.055908203125, "rewards/real": -38.49231719970703, "step": 474 }, { "epoch": 0.99, "grad_norm": 30.308133343664434, "learning_rate": 6.004792024680294e-11, "logits/generated": 0.3023368716239929, "logits/oppo_generated": -2.767500877380371, "logits/oppo_real": -2.7556655406951904, "logits/real": -2.2175793647766113, "logps/generated": -747.4019775390625, "logps/oppo_gen": -77.92488098144531, "logps/oppo_real": -223.17697143554688, "logps/real": -190.78720092773438, "loss": 0.2788, "loss/gen": 0.049265552312135696, "loss/real": 0.15117153525352478, "rewards/accuracies": 1.0, "rewards/generated": -669.4771118164062, "rewards/margins": 701.8668823242188, "rewards/real": 32.3897590637207, "step": 475 }, { "epoch": 1.0, "grad_norm": 24.058693641719074, "learning_rate": 2.6688558127485604e-11, "logits/generated": -0.8989108800888062, "logits/oppo_generated": -2.83430814743042, "logits/oppo_real": -3.1122868061065674, "logits/real": -2.3030447959899902, "logps/generated": -717.5481567382812, "logps/oppo_gen": -86.60485076904297, "logps/oppo_real": -374.5808410644531, "logps/real": -337.60858154296875, "loss": 0.2793, "loss/gen": 0.09490139782428741, "loss/real": 0.14539852738380432, "rewards/accuracies": 1.0, "rewards/generated": -630.943359375, "rewards/margins": 667.91552734375, "rewards/real": 36.972225189208984, "step": 476 }, { "epoch": 1.0, "grad_norm": 26.022392606987303, "learning_rate": 6.672228569148952e-12, "logits/generated": 0.45034003257751465, "logits/oppo_generated": -2.839545488357544, "logits/oppo_real": -2.7138943672180176, "logits/real": -2.2914600372314453, "logps/generated": -687.2457885742188, "logps/oppo_gen": -68.36036682128906, "logps/oppo_real": -309.4075622558594, "logps/real": -309.28704833984375, "loss": 0.2662, "loss/gen": 0.031270068138837814, "loss/real": 0.27253997325897217, "rewards/accuracies": 1.0, "rewards/generated": -618.8853759765625, "rewards/margins": 619.0059204101562, "rewards/real": 0.1204981803894043, "step": 477 }, { "epoch": 1.0, "grad_norm": 25.07323136638642, "learning_rate": 0.0, "logits/generated": -0.01666666567325592, "logits/oppo_generated": -2.699960231781006, "logits/oppo_real": -2.9887475967407227, "logits/real": -2.0640759468078613, "logps/generated": -709.115234375, "logps/oppo_gen": -71.9469223022461, "logps/oppo_real": -258.2105712890625, "logps/real": -262.96441650390625, "loss": 0.2512, "loss/gen": 0.017528638243675232, "loss/real": 0.30680006742477417, "rewards/accuracies": 1.0, "rewards/generated": -637.1682739257812, "rewards/margins": 632.4144287109375, "rewards/real": -4.753854274749756, "step": 478 }, { "epoch": 1.0, "step": 478, "total_flos": 0.0, "train_loss": 0.5313476455012126, "train_runtime": 13675.2672, "train_samples_per_second": 4.47, "train_steps_per_second": 0.035 } ], "logging_steps": 1.0, "max_steps": 478, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 96, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }