diff --git "a/run.log" "b/run.log" --- "a/run.log" +++ "b/run.log" @@ -5035,3 +5035,1152 @@ Time to load utils op: 0.0003948211669921875 seconds [2022-12-17 10:45:29,453] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-4000/global_step4000/zero_pp_rank_0_mp_rank_00_optim_states.pt. [2022-12-17 10:45:29,453] [INFO] [engine.py:3269:_save_zero_checkpoint] zero checkpoint saved ./checkpoint-4000/global_step4000/zero_pp_rank_0_mp_rank_00_optim_states.pt [2022-12-17 10:45:29,453] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +[2022-12-17 10:47:43,406] [INFO] [timer.py:197:stop] 0/8002, RunningAvgSamplesPerSec=6.338349624432332, CurrSamplesPerSec=5.420656353971379, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:47:54,680] [INFO] [timer.py:197:stop] 0/8004, RunningAvgSamplesPerSec=6.33835143065796, CurrSamplesPerSec=5.704160291062447, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:48:06,006] [INFO] [timer.py:197:stop] 0/8006, RunningAvgSamplesPerSec=6.338344500819827, CurrSamplesPerSec=5.702807649947566, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:48:17,251] [INFO] [timer.py:197:stop] 0/8008, RunningAvgSamplesPerSec=6.338348296361045, CurrSamplesPerSec=5.70014399244045, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:48:28,495] [INFO] [timer.py:197:stop] 0/8010, RunningAvgSamplesPerSec=6.3383562678162235, CurrSamplesPerSec=5.714695531689848, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:48:39,732] [INFO] [timer.py:197:stop] 0/8012, RunningAvgSamplesPerSec=6.33836165307344, CurrSamplesPerSec=5.715190485728729, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:48:50,957] [INFO] [timer.py:197:stop] 0/8014, RunningAvgSamplesPerSec=6.338372401860596, CurrSamplesPerSec=5.731869846856297, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:49:02,219] [INFO] [timer.py:197:stop] 0/8016, RunningAvgSamplesPerSec=6.338376282487288, CurrSamplesPerSec=5.72263388192585, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:49:13,499] [INFO] [timer.py:197:stop] 0/8018, RunningAvgSamplesPerSec=6.338378207077379, CurrSamplesPerSec=5.699282312244174, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:49:24,759] [INFO] [logging.py:68:log_dist] [Rank 0] step=4010, skipped=5, lr=[2.2133333333333335e-06], mom=[[0.9, 0.999]] +[2022-12-17 10:49:24,761] [INFO] [timer.py:197:stop] 0/8020, RunningAvgSamplesPerSec=6.338380369110319, CurrSamplesPerSec=5.700426273557393, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:49:36,071] [INFO] [timer.py:197:stop] 0/8022, RunningAvgSamplesPerSec=6.338378251809593, CurrSamplesPerSec=5.692219348309159, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:49:44,558] [INFO] [timer.py:197:stop] 0/8024, RunningAvgSamplesPerSec=6.338766498759093, CurrSamplesPerSec=10.175358309906693, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:49:55,831] [INFO] [timer.py:197:stop] 0/8026, RunningAvgSamplesPerSec=6.338767264312376, CurrSamplesPerSec=5.688479003299899, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:50:07,070] [INFO] [timer.py:197:stop] 0/8028, RunningAvgSamplesPerSec=6.338773488763729, CurrSamplesPerSec=5.724551109671964, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:50:18,301] [INFO] [timer.py:197:stop] 0/8030, RunningAvgSamplesPerSec=6.338779926930835, CurrSamplesPerSec=5.722825424903103, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:50:29,539] [INFO] [timer.py:197:stop] 0/8032, RunningAvgSamplesPerSec=6.33878562512091, CurrSamplesPerSec=5.724370925896205, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:50:40,772] [INFO] [timer.py:197:stop] 0/8034, RunningAvgSamplesPerSec=6.338794929840726, CurrSamplesPerSec=5.743888915707282, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:50:52,047] [INFO] [timer.py:197:stop] 0/8036, RunningAvgSamplesPerSec=6.338797520759499, CurrSamplesPerSec=5.707314258368477, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:51:03,314] [INFO] [timer.py:197:stop] 0/8038, RunningAvgSamplesPerSec=6.338802433467725, CurrSamplesPerSec=5.713592292317813, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:51:14,578] [INFO] [logging.py:68:log_dist] [Rank 0] step=4020, skipped=5, lr=[2.1911111111111115e-06], mom=[[0.9, 0.999]] +[2022-12-17 10:51:14,580] [INFO] [timer.py:197:stop] 0/8040, RunningAvgSamplesPerSec=6.338805872784326, CurrSamplesPerSec=5.714824493755569, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:51:25,902] [INFO] [timer.py:197:stop] 0/8042, RunningAvgSamplesPerSec=6.338803333984638, CurrSamplesPerSec=5.678797831205772, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:51:37,186] [INFO] [timer.py:197:stop] 0/8044, RunningAvgSamplesPerSec=6.338804543158869, CurrSamplesPerSec=5.717682131195379, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:51:48,453] [INFO] [timer.py:197:stop] 0/8046, RunningAvgSamplesPerSec=6.338806879460148, CurrSamplesPerSec=5.693272568998906, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:51:59,734] [INFO] [timer.py:197:stop] 0/8048, RunningAvgSamplesPerSec=6.338810120073018, CurrSamplesPerSec=5.700095576532893, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:52:11,006] [INFO] [timer.py:197:stop] 0/8050, RunningAvgSamplesPerSec=6.338812691972016, CurrSamplesPerSec=5.702963458180577, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0004, 'learning_rate': 2.1800000000000003e-06, 'epoch': 17.06} +[2022-12-17 10:52:22,315] [INFO] [timer.py:197:stop] 0/8052, RunningAvgSamplesPerSec=6.338810008624044, CurrSamplesPerSec=5.671229755707918, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:52:33,585] [INFO] [timer.py:197:stop] 0/8054, RunningAvgSamplesPerSec=6.338810694836544, CurrSamplesPerSec=5.713058462833503, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:52:44,839] [INFO] [timer.py:197:stop] 0/8056, RunningAvgSamplesPerSec=6.338814151111343, CurrSamplesPerSec=5.7207464664777845, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:52:56,121] [INFO] [timer.py:197:stop] 0/8058, RunningAvgSamplesPerSec=6.3388157502265425, CurrSamplesPerSec=5.721475381275096, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:53:07,427] [INFO] [logging.py:68:log_dist] [Rank 0] step=4030, skipped=5, lr=[2.168888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-17 10:53:07,429] [INFO] [timer.py:197:stop] 0/8060, RunningAvgSamplesPerSec=6.338812634700473, CurrSamplesPerSec=5.691154933906094, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:53:18,719] [INFO] [timer.py:197:stop] 0/8062, RunningAvgSamplesPerSec=6.338812535242969, CurrSamplesPerSec=5.710401257994357, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:53:30,002] [INFO] [timer.py:197:stop] 0/8064, RunningAvgSamplesPerSec=6.338814281507109, CurrSamplesPerSec=5.702056109912625, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:53:41,286] [INFO] [timer.py:197:stop] 0/8066, RunningAvgSamplesPerSec=6.33881451595564, CurrSamplesPerSec=5.683698075932731, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:53:52,607] [INFO] [timer.py:197:stop] 0/8068, RunningAvgSamplesPerSec=6.338811931154509, CurrSamplesPerSec=5.694774601571177, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:54:03,876] [INFO] [timer.py:197:stop] 0/8070, RunningAvgSamplesPerSec=6.3388150547450985, CurrSamplesPerSec=5.7143904257518585, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:54:15,345] [INFO] [timer.py:197:stop] 0/8072, RunningAvgSamplesPerSec=6.338817286845124, CurrSamplesPerSec=5.707756232922582, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:54:26,606] [INFO] [timer.py:197:stop] 0/8074, RunningAvgSamplesPerSec=6.338822134613462, CurrSamplesPerSec=5.726801676459754, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:54:37,842] [INFO] [timer.py:197:stop] 0/8076, RunningAvgSamplesPerSec=6.338828168085962, CurrSamplesPerSec=5.734015445790293, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:54:49,098] [INFO] [timer.py:197:stop] 0/8078, RunningAvgSamplesPerSec=6.338831253328031, CurrSamplesPerSec=5.718133508620822, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:55:00,394] [INFO] [logging.py:68:log_dist] [Rank 0] step=4040, skipped=5, lr=[2.1466666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-17 10:55:00,395] [INFO] [timer.py:197:stop] 0/8080, RunningAvgSamplesPerSec=6.338832635103738, CurrSamplesPerSec=5.707157241604528, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:55:11,720] [INFO] [timer.py:197:stop] 0/8082, RunningAvgSamplesPerSec=6.338828706895809, CurrSamplesPerSec=5.695046685152615, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:55:22,963] [INFO] [timer.py:197:stop] 0/8084, RunningAvgSamplesPerSec=6.338832031374013, CurrSamplesPerSec=5.717097614428014, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:55:34,239] [INFO] [timer.py:197:stop] 0/8086, RunningAvgSamplesPerSec=6.338835473233115, CurrSamplesPerSec=5.721566844054441, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:55:45,473] [INFO] [timer.py:197:stop] 0/8088, RunningAvgSamplesPerSec=6.338840393624562, CurrSamplesPerSec=5.726270750616645, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:55:56,756] [INFO] [timer.py:197:stop] 0/8090, RunningAvgSamplesPerSec=6.338841210521914, CurrSamplesPerSec=5.708622662798983, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:56:08,018] [INFO] [timer.py:197:stop] 0/8092, RunningAvgSamplesPerSec=6.338846009986815, CurrSamplesPerSec=5.719069862889136, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:56:19,320] [INFO] [timer.py:197:stop] 0/8094, RunningAvgSamplesPerSec=6.3388423110670855, CurrSamplesPerSec=5.697636888234431, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:56:30,592] [INFO] [timer.py:197:stop] 0/8096, RunningAvgSamplesPerSec=6.3388448983325505, CurrSamplesPerSec=5.708167202030818, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:56:41,883] [INFO] [timer.py:197:stop] 0/8098, RunningAvgSamplesPerSec=6.338842357992624, CurrSamplesPerSec=5.695432141121127, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:56:53,150] [INFO] [logging.py:68:log_dist] [Rank 0] step=4050, skipped=5, lr=[2.1244444444444443e-06], mom=[[0.9, 0.999]] +[2022-12-17 10:56:53,152] [INFO] [timer.py:197:stop] 0/8100, RunningAvgSamplesPerSec=6.338845647073789, CurrSamplesPerSec=5.717637557582573, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 2.1244444444444443e-06, 'epoch': 17.16} +[2022-12-17 10:57:04,428] [INFO] [timer.py:197:stop] 0/8102, RunningAvgSamplesPerSec=6.338848160233028, CurrSamplesPerSec=5.732973789802865, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:57:15,740] [INFO] [timer.py:197:stop] 0/8104, RunningAvgSamplesPerSec=6.338846048444852, CurrSamplesPerSec=5.697548365669088, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:57:27,008] [INFO] [timer.py:197:stop] 0/8106, RunningAvgSamplesPerSec=6.3388497642169925, CurrSamplesPerSec=5.718600794644177, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:57:38,278] [INFO] [timer.py:197:stop] 0/8108, RunningAvgSamplesPerSec=6.338852471241099, CurrSamplesPerSec=5.73913419021571, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:57:49,561] [INFO] [timer.py:197:stop] 0/8110, RunningAvgSamplesPerSec=6.338853272107325, CurrSamplesPerSec=5.7126093450966895, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:58:00,798] [INFO] [timer.py:197:stop] 0/8112, RunningAvgSamplesPerSec=6.338861996910889, CurrSamplesPerSec=5.7258691399609205, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:58:12,086] [INFO] [timer.py:197:stop] 0/8114, RunningAvgSamplesPerSec=6.3388632967592065, CurrSamplesPerSec=5.715007970574066, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:58:23,350] [INFO] [timer.py:197:stop] 0/8116, RunningAvgSamplesPerSec=6.338868725063488, CurrSamplesPerSec=5.731447870570084, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:58:34,647] [INFO] [timer.py:197:stop] 0/8118, RunningAvgSamplesPerSec=6.338865962296623, CurrSamplesPerSec=5.6913957801372606, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:58:45,904] [INFO] [logging.py:68:log_dist] [Rank 0] step=4060, skipped=5, lr=[2.1022222222222224e-06], mom=[[0.9, 0.999]] +[2022-12-17 10:58:45,906] [INFO] [timer.py:197:stop] 0/8120, RunningAvgSamplesPerSec=6.338870234253123, CurrSamplesPerSec=5.701580382323306, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:58:57,197] [INFO] [timer.py:197:stop] 0/8122, RunningAvgSamplesPerSec=6.338871197907672, CurrSamplesPerSec=5.69734617690224, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:59:08,469] [INFO] [timer.py:197:stop] 0/8124, RunningAvgSamplesPerSec=6.338874408163104, CurrSamplesPerSec=5.712023677168763, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:59:19,768] [INFO] [timer.py:197:stop] 0/8126, RunningAvgSamplesPerSec=6.338873435771249, CurrSamplesPerSec=5.6937959439141, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:59:31,053] [INFO] [timer.py:197:stop] 0/8128, RunningAvgSamplesPerSec=6.338875103479378, CurrSamplesPerSec=5.706707110055808, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:59:42,333] [INFO] [timer.py:197:stop] 0/8130, RunningAvgSamplesPerSec=6.338876937697654, CurrSamplesPerSec=5.723553161097039, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:59:53,604] [INFO] [timer.py:197:stop] 0/8132, RunningAvgSamplesPerSec=6.33888008476501, CurrSamplesPerSec=5.7310925187864905, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:00:04,854] [INFO] [timer.py:197:stop] 0/8134, RunningAvgSamplesPerSec=6.338886804346215, CurrSamplesPerSec=5.73368304465005, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:00:16,108] [INFO] [timer.py:197:stop] 0/8136, RunningAvgSamplesPerSec=6.338893059648211, CurrSamplesPerSec=5.728716563514304, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:00:27,360] [INFO] [timer.py:197:stop] 0/8138, RunningAvgSamplesPerSec=6.338897099643674, CurrSamplesPerSec=5.70207137132762, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:00:38,615] [INFO] [logging.py:68:log_dist] [Rank 0] step=4070, skipped=5, lr=[2.08e-06], mom=[[0.9, 0.999]] +[2022-12-17 11:00:38,617] [INFO] [timer.py:197:stop] 0/8140, RunningAvgSamplesPerSec=6.338901736596038, CurrSamplesPerSec=5.715131836254571, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:00:49,916] [INFO] [timer.py:197:stop] 0/8142, RunningAvgSamplesPerSec=6.338901148214492, CurrSamplesPerSec=5.674539084926821, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:01:01,172] [INFO] [timer.py:197:stop] 0/8144, RunningAvgSamplesPerSec=6.338908038076983, CurrSamplesPerSec=5.732348194412151, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:01:12,415] [INFO] [timer.py:197:stop] 0/8146, RunningAvgSamplesPerSec=6.338913779001088, CurrSamplesPerSec=5.734613962983128, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:01:23,683] [INFO] [timer.py:197:stop] 0/8148, RunningAvgSamplesPerSec=6.338917733451932, CurrSamplesPerSec=5.725038492812677, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:01:34,947] [INFO] [timer.py:197:stop] 0/8150, RunningAvgSamplesPerSec=6.338923103422685, CurrSamplesPerSec=5.71891390438847, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 2.0688888888888892e-06, 'epoch': 17.27} +[2022-12-17 11:01:46,198] [INFO] [timer.py:197:stop] 0/8152, RunningAvgSamplesPerSec=6.338929776354999, CurrSamplesPerSec=5.732722800691356, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:01:57,467] [INFO] [timer.py:197:stop] 0/8154, RunningAvgSamplesPerSec=6.338933563760563, CurrSamplesPerSec=5.711915503414802, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:02:08,758] [INFO] [timer.py:197:stop] 0/8156, RunningAvgSamplesPerSec=6.338935132196591, CurrSamplesPerSec=5.678368257595655, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:02:20,051] [INFO] [timer.py:197:stop] 0/8158, RunningAvgSamplesPerSec=6.338935200431204, CurrSamplesPerSec=5.697341581869769, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:02:31,296] [INFO] [logging.py:68:log_dist] [Rank 0] step=4080, skipped=5, lr=[2.057777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-17 11:02:31,298] [INFO] [timer.py:197:stop] 0/8160, RunningAvgSamplesPerSec=6.338942180948522, CurrSamplesPerSec=5.748176362106808, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:02:42,577] [INFO] [timer.py:197:stop] 0/8162, RunningAvgSamplesPerSec=6.338944245421692, CurrSamplesPerSec=5.714502829412263, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:02:53,889] [INFO] [timer.py:197:stop] 0/8164, RunningAvgSamplesPerSec=6.338941353879358, CurrSamplesPerSec=5.699720623180004, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:03:05,171] [INFO] [timer.py:197:stop] 0/8166, RunningAvgSamplesPerSec=6.338942619512358, CurrSamplesPerSec=5.715760494764467, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:03:16,451] [INFO] [timer.py:197:stop] 0/8168, RunningAvgSamplesPerSec=6.33894495321946, CurrSamplesPerSec=5.719275546167675, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:03:27,745] [INFO] [timer.py:197:stop] 0/8170, RunningAvgSamplesPerSec=6.3389440244462625, CurrSamplesPerSec=5.709750945228217, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:03:39,039] [INFO] [timer.py:197:stop] 0/8172, RunningAvgSamplesPerSec=6.338941089178579, CurrSamplesPerSec=5.703327690107661, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:03:50,320] [INFO] [timer.py:197:stop] 0/8174, RunningAvgSamplesPerSec=6.33894121579318, CurrSamplesPerSec=5.728734657651518, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:04:01,599] [INFO] [timer.py:197:stop] 0/8176, RunningAvgSamplesPerSec=6.338943687836185, CurrSamplesPerSec=5.71622909715484, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:04:12,834] [INFO] [timer.py:197:stop] 0/8178, RunningAvgSamplesPerSec=6.3389499263487465, CurrSamplesPerSec=5.733750648539528, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:04:24,098] [INFO] [logging.py:68:log_dist] [Rank 0] step=4090, skipped=5, lr=[2.0355555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-17 11:04:24,100] [INFO] [timer.py:197:stop] 0/8180, RunningAvgSamplesPerSec=6.338953945781538, CurrSamplesPerSec=5.720305891098483, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:04:35,394] [INFO] [timer.py:197:stop] 0/8182, RunningAvgSamplesPerSec=6.338951432192114, CurrSamplesPerSec=5.683273536283522, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:04:46,666] [INFO] [timer.py:197:stop] 0/8184, RunningAvgSamplesPerSec=6.338951766298605, CurrSamplesPerSec=5.697556105237196, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:04:57,934] [INFO] [timer.py:197:stop] 0/8186, RunningAvgSamplesPerSec=6.338953882850323, CurrSamplesPerSec=5.713337160451459, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:05:09,215] [INFO] [timer.py:197:stop] 0/8188, RunningAvgSamplesPerSec=6.33895551009071, CurrSamplesPerSec=5.703192703189134, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:05:20,485] [INFO] [timer.py:197:stop] 0/8190, RunningAvgSamplesPerSec=6.338958504561117, CurrSamplesPerSec=5.73846210545695, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:05:31,725] [INFO] [timer.py:197:stop] 0/8192, RunningAvgSamplesPerSec=6.338965343487257, CurrSamplesPerSec=5.738406902861988, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:05:42,966] [INFO] [timer.py:197:stop] 0/8194, RunningAvgSamplesPerSec=6.33897323043952, CurrSamplesPerSec=5.73368133007938, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:05:54,353] [INFO] [timer.py:197:stop] 0/8196, RunningAvgSamplesPerSec=6.338978358488804, CurrSamplesPerSec=5.716910594302126, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:06:05,609] [INFO] [timer.py:197:stop] 0/8198, RunningAvgSamplesPerSec=6.338984368170587, CurrSamplesPerSec=5.726013263824789, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:06:16,838] [INFO] [logging.py:68:log_dist] [Rank 0] step=4100, skipped=5, lr=[2.0133333333333337e-06], mom=[[0.9, 0.999]] +[2022-12-17 11:06:16,839] [INFO] [timer.py:197:stop] 0/8200, RunningAvgSamplesPerSec=6.338994041470853, CurrSamplesPerSec=5.74776994998559, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 2.0133333333333337e-06, 'epoch': 17.37} +[2022-12-17 11:06:28,044] [INFO] [timer.py:197:stop] 0/8202, RunningAvgSamplesPerSec=6.339006186191332, CurrSamplesPerSec=5.744074263552449, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:06:39,300] [INFO] [timer.py:197:stop] 0/8204, RunningAvgSamplesPerSec=6.339013783921928, CurrSamplesPerSec=5.754720584334838, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:06:50,550] [INFO] [timer.py:197:stop] 0/8206, RunningAvgSamplesPerSec=6.339022799293814, CurrSamplesPerSec=5.765227145289706, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:07:01,784] [INFO] [timer.py:197:stop] 0/8208, RunningAvgSamplesPerSec=6.339034291148339, CurrSamplesPerSec=5.757240403557556, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:07:13,076] [INFO] [timer.py:197:stop] 0/8210, RunningAvgSamplesPerSec=6.339035110256033, CurrSamplesPerSec=5.716284604241528, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:07:24,329] [INFO] [timer.py:197:stop] 0/8212, RunningAvgSamplesPerSec=6.339041049331456, CurrSamplesPerSec=5.71850577200943, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:07:35,570] [INFO] [timer.py:197:stop] 0/8214, RunningAvgSamplesPerSec=6.339050077138531, CurrSamplesPerSec=5.737041896588219, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:07:46,813] [INFO] [timer.py:197:stop] 0/8216, RunningAvgSamplesPerSec=6.339055662711915, CurrSamplesPerSec=5.7142352089188195, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:07:58,065] [INFO] [timer.py:197:stop] 0/8218, RunningAvgSamplesPerSec=6.339061990192898, CurrSamplesPerSec=5.724786977121834, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:08:09,376] [INFO] [logging.py:68:log_dist] [Rank 0] step=4110, skipped=5, lr=[1.9911111111111113e-06], mom=[[0.9, 0.999]] +[2022-12-17 11:08:09,379] [INFO] [timer.py:197:stop] 0/8220, RunningAvgSamplesPerSec=6.339059931735823, CurrSamplesPerSec=5.666828021862285, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:08:20,617] [INFO] [timer.py:197:stop] 0/8222, RunningAvgSamplesPerSec=6.339068523169324, CurrSamplesPerSec=5.748742382423336, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:08:31,901] [INFO] [timer.py:197:stop] 0/8224, RunningAvgSamplesPerSec=6.339071114500694, CurrSamplesPerSec=5.707910612487633, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:08:43,166] [INFO] [timer.py:197:stop] 0/8226, RunningAvgSamplesPerSec=6.339076373811075, CurrSamplesPerSec=5.712450091825509, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:08:54,414] [INFO] [timer.py:197:stop] 0/8228, RunningAvgSamplesPerSec=6.339083077897917, CurrSamplesPerSec=5.72001334948079, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:09:05,654] [INFO] [timer.py:197:stop] 0/8230, RunningAvgSamplesPerSec=6.339091960303741, CurrSamplesPerSec=5.7456059304629505, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:09:16,998] [INFO] [timer.py:197:stop] 0/8232, RunningAvgSamplesPerSec=6.3390833115626615, CurrSamplesPerSec=5.637741229429793, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:09:28,254] [INFO] [timer.py:197:stop] 0/8234, RunningAvgSamplesPerSec=6.339088383880457, CurrSamplesPerSec=5.717522351409421, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:09:39,502] [INFO] [timer.py:197:stop] 0/8236, RunningAvgSamplesPerSec=6.3390943973054705, CurrSamplesPerSec=5.717494342153484, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:09:50,792] [INFO] [timer.py:197:stop] 0/8238, RunningAvgSamplesPerSec=6.3390989670440625, CurrSamplesPerSec=5.703746020494652, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:10:02,085] [INFO] [logging.py:68:log_dist] [Rank 0] step=4120, skipped=5, lr=[1.968888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-17 11:10:02,086] [INFO] [timer.py:197:stop] 0/8240, RunningAvgSamplesPerSec=6.3390987022636445, CurrSamplesPerSec=5.698852295748182, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:10:13,351] [INFO] [timer.py:197:stop] 0/8242, RunningAvgSamplesPerSec=6.339103412629639, CurrSamplesPerSec=5.716625460863346, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:10:24,583] [INFO] [timer.py:197:stop] 0/8244, RunningAvgSamplesPerSec=6.339112582276825, CurrSamplesPerSec=5.723448943387044, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:10:35,845] [INFO] [timer.py:197:stop] 0/8246, RunningAvgSamplesPerSec=6.339117191533311, CurrSamplesPerSec=5.7132306393035375, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:10:47,096] [INFO] [timer.py:197:stop] 0/8248, RunningAvgSamplesPerSec=6.339120428724326, CurrSamplesPerSec=5.703754019298071, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:10:58,338] [INFO] [timer.py:197:stop] 0/8250, RunningAvgSamplesPerSec=6.339127459602533, CurrSamplesPerSec=5.727030398279421, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 1.9577777777777777e-06, 'epoch': 17.48} +[2022-12-17 11:11:09,580] [INFO] [timer.py:197:stop] 0/8252, RunningAvgSamplesPerSec=6.339135984734979, CurrSamplesPerSec=5.71758251134771, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:11:20,882] [INFO] [timer.py:197:stop] 0/8254, RunningAvgSamplesPerSec=6.339141798644105, CurrSamplesPerSec=5.715823295197673, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:11:32,199] [INFO] [timer.py:197:stop] 0/8256, RunningAvgSamplesPerSec=6.339145620009977, CurrSamplesPerSec=5.708334228243922, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:11:43,485] [INFO] [timer.py:197:stop] 0/8258, RunningAvgSamplesPerSec=6.339152792660803, CurrSamplesPerSec=5.72381237944853, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:11:54,974] [INFO] [logging.py:68:log_dist] [Rank 0] step=4130, skipped=5, lr=[1.9466666666666665e-06], mom=[[0.9, 0.999]] +[2022-12-17 11:11:54,976] [INFO] [timer.py:197:stop] 0/8260, RunningAvgSamplesPerSec=6.339150139457296, CurrSamplesPerSec=5.680538173020475, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:12:06,285] [INFO] [timer.py:197:stop] 0/8262, RunningAvgSamplesPerSec=6.339155182167034, CurrSamplesPerSec=5.733205943686423, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:12:17,576] [INFO] [timer.py:197:stop] 0/8264, RunningAvgSamplesPerSec=6.3391629960552365, CurrSamplesPerSec=5.724178547068688, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:12:28,895] [INFO] [timer.py:197:stop] 0/8266, RunningAvgSamplesPerSec=6.339167342913138, CurrSamplesPerSec=5.713661369116393, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:12:40,176] [INFO] [timer.py:197:stop] 0/8268, RunningAvgSamplesPerSec=6.339176790809956, CurrSamplesPerSec=5.727440970450266, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:12:51,495] [INFO] [timer.py:197:stop] 0/8270, RunningAvgSamplesPerSec=6.339181293898531, CurrSamplesPerSec=5.7165577732425215, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:13:02,808] [INFO] [timer.py:197:stop] 0/8272, RunningAvgSamplesPerSec=6.339186260828034, CurrSamplesPerSec=5.712118484501903, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:13:14,095] [INFO] [timer.py:197:stop] 0/8274, RunningAvgSamplesPerSec=6.339192071129688, CurrSamplesPerSec=5.730050456633666, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:13:25,408] [INFO] [timer.py:197:stop] 0/8276, RunningAvgSamplesPerSec=6.339196894368015, CurrSamplesPerSec=5.720743052793949, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:13:36,739] [INFO] [timer.py:197:stop] 0/8278, RunningAvgSamplesPerSec=6.33919919016503, CurrSamplesPerSec=5.720215931365112, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:13:48,081] [INFO] [logging.py:68:log_dist] [Rank 0] step=4140, skipped=5, lr=[1.9244444444444446e-06], mom=[[0.9, 0.999]] +[2022-12-17 11:13:48,082] [INFO] [timer.py:197:stop] 0/8280, RunningAvgSamplesPerSec=6.33920117072856, CurrSamplesPerSec=5.712191901573292, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:13:59,391] [INFO] [timer.py:197:stop] 0/8282, RunningAvgSamplesPerSec=6.339206953617565, CurrSamplesPerSec=5.726021813773496, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:14:10,634] [INFO] [timer.py:197:stop] 0/8284, RunningAvgSamplesPerSec=6.3392097988073655, CurrSamplesPerSec=5.726001049656643, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:14:21,913] [INFO] [timer.py:197:stop] 0/8286, RunningAvgSamplesPerSec=6.339212154446398, CurrSamplesPerSec=5.7209666576913625, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:14:33,192] [INFO] [timer.py:197:stop] 0/8288, RunningAvgSamplesPerSec=6.339214232019025, CurrSamplesPerSec=5.711508126228083, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:14:44,487] [INFO] [timer.py:197:stop] 0/8290, RunningAvgSamplesPerSec=6.339214198475859, CurrSamplesPerSec=5.6819615706288, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:14:55,774] [INFO] [timer.py:197:stop] 0/8292, RunningAvgSamplesPerSec=6.339216904861013, CurrSamplesPerSec=5.723860222690163, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:15:07,076] [INFO] [timer.py:197:stop] 0/8294, RunningAvgSamplesPerSec=6.3392155917844235, CurrSamplesPerSec=5.684246653894753, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:15:18,387] [INFO] [timer.py:197:stop] 0/8296, RunningAvgSamplesPerSec=6.33921313178604, CurrSamplesPerSec=5.666598102286572, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:15:29,673] [INFO] [timer.py:197:stop] 0/8298, RunningAvgSamplesPerSec=6.339215239750418, CurrSamplesPerSec=5.6977292835724915, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:15:40,954] [INFO] [logging.py:68:log_dist] [Rank 0] step=4150, skipped=5, lr=[1.9022222222222222e-06], mom=[[0.9, 0.999]] +[2022-12-17 11:15:40,955] [INFO] [timer.py:197:stop] 0/8300, RunningAvgSamplesPerSec=6.339217975605579, CurrSamplesPerSec=5.693590156972587, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 1.9022222222222222e-06, 'epoch': 17.58} +[2022-12-17 11:15:52,248] [INFO] [timer.py:197:stop] 0/8302, RunningAvgSamplesPerSec=6.339218437141733, CurrSamplesPerSec=5.710349266393622, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:16:03,530] [INFO] [timer.py:197:stop] 0/8304, RunningAvgSamplesPerSec=6.339219993659909, CurrSamplesPerSec=5.706298049388331, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:16:14,856] [INFO] [timer.py:197:stop] 0/8306, RunningAvgSamplesPerSec=6.339216303753219, CurrSamplesPerSec=5.689630932039807, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:16:26,150] [INFO] [timer.py:197:stop] 0/8308, RunningAvgSamplesPerSec=6.339218495615838, CurrSamplesPerSec=5.706676780294752, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:16:37,440] [INFO] [timer.py:197:stop] 0/8310, RunningAvgSamplesPerSec=6.339219412467867, CurrSamplesPerSec=5.721004699086555, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:16:48,713] [INFO] [timer.py:197:stop] 0/8312, RunningAvgSamplesPerSec=6.33922233962562, CurrSamplesPerSec=5.711818272076358, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:16:59,959] [INFO] [timer.py:197:stop] 0/8314, RunningAvgSamplesPerSec=6.339227335618701, CurrSamplesPerSec=5.726106581789273, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:17:11,236] [INFO] [timer.py:197:stop] 0/8316, RunningAvgSamplesPerSec=6.339230585670635, CurrSamplesPerSec=5.717236669874342, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:17:22,486] [INFO] [timer.py:197:stop] 0/8318, RunningAvgSamplesPerSec=6.339234875250686, CurrSamplesPerSec=5.723407208553411, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:17:33,817] [INFO] [logging.py:68:log_dist] [Rank 0] step=4160, skipped=5, lr=[1.8800000000000002e-06], mom=[[0.9, 0.999]] +[2022-12-17 11:17:33,818] [INFO] [timer.py:197:stop] 0/8320, RunningAvgSamplesPerSec=6.339230314931957, CurrSamplesPerSec=5.68304252094583, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:17:45,121] [INFO] [timer.py:197:stop] 0/8322, RunningAvgSamplesPerSec=6.33922837184202, CurrSamplesPerSec=5.679600933362707, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:17:56,405] [INFO] [timer.py:197:stop] 0/8324, RunningAvgSamplesPerSec=6.339230634670554, CurrSamplesPerSec=5.709936282634974, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:18:07,697] [INFO] [timer.py:197:stop] 0/8326, RunningAvgSamplesPerSec=6.339231705867129, CurrSamplesPerSec=5.701697369069446, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:18:19,024] [INFO] [timer.py:197:stop] 0/8328, RunningAvgSamplesPerSec=6.339227192482196, CurrSamplesPerSec=5.668277111901843, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:18:30,339] [INFO] [timer.py:197:stop] 0/8330, RunningAvgSamplesPerSec=6.339224302880751, CurrSamplesPerSec=5.6813662973292764, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:18:41,624] [INFO] [timer.py:197:stop] 0/8332, RunningAvgSamplesPerSec=6.33922615975293, CurrSamplesPerSec=5.70432708286264, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:18:52,934] [INFO] [timer.py:197:stop] 0/8334, RunningAvgSamplesPerSec=6.339225213910152, CurrSamplesPerSec=5.689743810942323, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:19:04,202] [INFO] [timer.py:197:stop] 0/8336, RunningAvgSamplesPerSec=6.339226439350858, CurrSamplesPerSec=5.698207513618382, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:19:15,474] [INFO] [timer.py:197:stop] 0/8338, RunningAvgSamplesPerSec=6.339230754324665, CurrSamplesPerSec=5.721747827024824, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:19:26,733] [INFO] [logging.py:68:log_dist] [Rank 0] step=4170, skipped=5, lr=[1.8577777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-17 11:19:26,735] [INFO] [timer.py:197:stop] 0/8340, RunningAvgSamplesPerSec=6.339235738048293, CurrSamplesPerSec=5.710465884387955, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:19:37,967] [INFO] [timer.py:197:stop] 0/8342, RunningAvgSamplesPerSec=6.339242698324499, CurrSamplesPerSec=5.721313194470139, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:19:49,272] [INFO] [timer.py:197:stop] 0/8344, RunningAvgSamplesPerSec=6.3392414822520005, CurrSamplesPerSec=5.701325595506997, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:20:00,594] [INFO] [timer.py:197:stop] 0/8346, RunningAvgSamplesPerSec=6.339237516286787, CurrSamplesPerSec=5.672403475904154, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:20:11,900] [INFO] [timer.py:197:stop] 0/8348, RunningAvgSamplesPerSec=6.339236875028155, CurrSamplesPerSec=5.685905548740478, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:20:23,180] [INFO] [timer.py:197:stop] 0/8350, RunningAvgSamplesPerSec=6.339236993106727, CurrSamplesPerSec=5.7164289764054255, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 1.8466666666666668e-06, 'epoch': 17.69} +[2022-12-17 11:20:34,486] [INFO] [timer.py:197:stop] 0/8352, RunningAvgSamplesPerSec=6.339234923905659, CurrSamplesPerSec=5.683368594923178, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:20:45,748] [INFO] [timer.py:197:stop] 0/8354, RunningAvgSamplesPerSec=6.339237401598272, CurrSamplesPerSec=5.708726826885181, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:20:57,054] [INFO] [timer.py:197:stop] 0/8356, RunningAvgSamplesPerSec=6.339235782079853, CurrSamplesPerSec=5.686971373779364, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:21:08,516] [INFO] [timer.py:197:stop] 0/8358, RunningAvgSamplesPerSec=6.339236342324881, CurrSamplesPerSec=5.697629632182958, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:21:19,799] [INFO] [logging.py:68:log_dist] [Rank 0] step=4180, skipped=5, lr=[1.8355555555555557e-06], mom=[[0.9, 0.999]] +[2022-12-17 11:21:19,801] [INFO] [timer.py:197:stop] 0/8360, RunningAvgSamplesPerSec=6.339237709966899, CurrSamplesPerSec=5.7126971205425034, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:21:31,034] [INFO] [timer.py:197:stop] 0/8362, RunningAvgSamplesPerSec=6.339244578809194, CurrSamplesPerSec=5.731835822103387, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:21:42,314] [INFO] [timer.py:197:stop] 0/8364, RunningAvgSamplesPerSec=6.3392467301292434, CurrSamplesPerSec=5.7207367131062075, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:21:53,630] [INFO] [timer.py:197:stop] 0/8366, RunningAvgSamplesPerSec=6.339244331933694, CurrSamplesPerSec=5.684867813361724, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:22:04,874] [INFO] [timer.py:197:stop] 0/8368, RunningAvgSamplesPerSec=6.339252480676972, CurrSamplesPerSec=5.736243797650853, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:22:16,102] [INFO] [timer.py:197:stop] 0/8370, RunningAvgSamplesPerSec=6.339260552374997, CurrSamplesPerSec=5.731778054383289, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:22:27,417] [INFO] [timer.py:197:stop] 0/8372, RunningAvgSamplesPerSec=6.339258189466109, CurrSamplesPerSec=5.665809435703773, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:22:38,695] [INFO] [timer.py:197:stop] 0/8374, RunningAvgSamplesPerSec=6.339260729551199, CurrSamplesPerSec=5.712891646378365, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:22:49,975] [INFO] [timer.py:197:stop] 0/8376, RunningAvgSamplesPerSec=6.339262395899968, CurrSamplesPerSec=5.718768919349549, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:23:01,268] [INFO] [timer.py:197:stop] 0/8378, RunningAvgSamplesPerSec=6.3392628381798355, CurrSamplesPerSec=5.6697381999902845, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:23:12,560] [INFO] [logging.py:68:log_dist] [Rank 0] step=4190, skipped=5, lr=[1.8133333333333337e-06], mom=[[0.9, 0.999]] +[2022-12-17 11:23:12,562] [INFO] [timer.py:197:stop] 0/8380, RunningAvgSamplesPerSec=6.339263209916691, CurrSamplesPerSec=5.7151639594689145, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:23:23,807] [INFO] [timer.py:197:stop] 0/8382, RunningAvgSamplesPerSec=6.339268664321199, CurrSamplesPerSec=5.725624390293013, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:23:35,113] [INFO] [timer.py:197:stop] 0/8384, RunningAvgSamplesPerSec=6.3392655904391715, CurrSamplesPerSec=5.6802217990221235, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:23:46,412] [INFO] [timer.py:197:stop] 0/8386, RunningAvgSamplesPerSec=6.339265396750826, CurrSamplesPerSec=5.690623117243999, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:23:57,717] [INFO] [timer.py:197:stop] 0/8388, RunningAvgSamplesPerSec=6.339263619268187, CurrSamplesPerSec=5.707007270753393, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:24:09,014] [INFO] [timer.py:197:stop] 0/8390, RunningAvgSamplesPerSec=6.339262406010603, CurrSamplesPerSec=5.702546211715137, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:24:20,315] [INFO] [timer.py:197:stop] 0/8392, RunningAvgSamplesPerSec=6.33926228919691, CurrSamplesPerSec=5.7045947459036475, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:24:31,598] [INFO] [timer.py:197:stop] 0/8394, RunningAvgSamplesPerSec=6.339264393046411, CurrSamplesPerSec=5.722754174297952, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:24:42,857] [INFO] [timer.py:197:stop] 0/8396, RunningAvgSamplesPerSec=6.339268603723656, CurrSamplesPerSec=5.736055522811934, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:24:54,138] [INFO] [timer.py:197:stop] 0/8398, RunningAvgSamplesPerSec=6.339271480524336, CurrSamplesPerSec=5.70896673439108, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:25:05,439] [INFO] [logging.py:68:log_dist] [Rank 0] step=4200, skipped=5, lr=[1.7911111111111113e-06], mom=[[0.9, 0.999]] +[2022-12-17 11:25:05,441] [INFO] [timer.py:197:stop] 0/8400, RunningAvgSamplesPerSec=6.339271660929598, CurrSamplesPerSec=5.726014729528468, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 1.7911111111111113e-06, 'epoch': 17.8} +[2022-12-17 11:25:16,720] [INFO] [timer.py:197:stop] 0/8402, RunningAvgSamplesPerSec=6.339274754134736, CurrSamplesPerSec=5.722479924827972, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:25:27,987] [INFO] [timer.py:197:stop] 0/8404, RunningAvgSamplesPerSec=6.339277088608752, CurrSamplesPerSec=5.712859062377203, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:25:39,244] [INFO] [timer.py:197:stop] 0/8406, RunningAvgSamplesPerSec=6.3392782652594875, CurrSamplesPerSec=5.707171802301107, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:25:50,788] [INFO] [timer.py:197:stop] 0/8408, RunningAvgSamplesPerSec=6.339279890790081, CurrSamplesPerSec=5.708733625605707, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:26:02,078] [INFO] [timer.py:197:stop] 0/8410, RunningAvgSamplesPerSec=6.339278027613123, CurrSamplesPerSec=5.705521335214915, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:26:13,336] [INFO] [timer.py:197:stop] 0/8412, RunningAvgSamplesPerSec=6.339281095125638, CurrSamplesPerSec=5.702650396460303, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:26:24,594] [INFO] [timer.py:197:stop] 0/8414, RunningAvgSamplesPerSec=6.339285152925692, CurrSamplesPerSec=5.705124084723759, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:26:35,852] [INFO] [timer.py:197:stop] 0/8416, RunningAvgSamplesPerSec=6.339290260592174, CurrSamplesPerSec=5.736132253122681, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:26:47,107] [INFO] [timer.py:197:stop] 0/8418, RunningAvgSamplesPerSec=6.339293490660097, CurrSamplesPerSec=5.707359641948332, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:26:58,378] [INFO] [logging.py:68:log_dist] [Rank 0] step=4210, skipped=5, lr=[1.7688888888888891e-06], mom=[[0.9, 0.999]] +[2022-12-17 11:26:58,380] [INFO] [timer.py:197:stop] 0/8420, RunningAvgSamplesPerSec=6.339294665055505, CurrSamplesPerSec=5.723917342709765, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:27:09,669] [INFO] [timer.py:197:stop] 0/8422, RunningAvgSamplesPerSec=6.339296075010931, CurrSamplesPerSec=5.711223045375077, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:27:20,912] [INFO] [timer.py:197:stop] 0/8424, RunningAvgSamplesPerSec=6.33930149368414, CurrSamplesPerSec=5.715657047447357, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:27:32,217] [INFO] [timer.py:197:stop] 0/8426, RunningAvgSamplesPerSec=6.339302209541907, CurrSamplesPerSec=5.707435121051169, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:27:43,494] [INFO] [timer.py:197:stop] 0/8428, RunningAvgSamplesPerSec=6.33930898862225, CurrSamplesPerSec=5.732467181689143, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:27:54,732] [INFO] [timer.py:197:stop] 0/8430, RunningAvgSamplesPerSec=6.339317828331498, CurrSamplesPerSec=5.736133969159488, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:28:05,946] [INFO] [timer.py:197:stop] 0/8432, RunningAvgSamplesPerSec=6.339327271371329, CurrSamplesPerSec=5.738240320136126, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:28:17,192] [INFO] [timer.py:197:stop] 0/8434, RunningAvgSamplesPerSec=6.339334200311701, CurrSamplesPerSec=5.73747867698906, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:28:28,434] [INFO] [timer.py:197:stop] 0/8436, RunningAvgSamplesPerSec=6.339342476771847, CurrSamplesPerSec=5.727931534738645, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:28:39,663] [INFO] [timer.py:197:stop] 0/8438, RunningAvgSamplesPerSec=6.3393491244231255, CurrSamplesPerSec=5.7417042369934315, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:28:50,891] [INFO] [logging.py:68:log_dist] [Rank 0] step=4220, skipped=5, lr=[1.7466666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-17 11:28:50,893] [INFO] [timer.py:197:stop] 0/8440, RunningAvgSamplesPerSec=6.339356976835649, CurrSamplesPerSec=5.740567711638473, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:29:02,107] [INFO] [timer.py:197:stop] 0/8442, RunningAvgSamplesPerSec=6.339367479605312, CurrSamplesPerSec=5.755491501651248, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:29:13,377] [INFO] [timer.py:197:stop] 0/8444, RunningAvgSamplesPerSec=6.33937230949232, CurrSamplesPerSec=5.708431098214918, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:29:24,659] [INFO] [timer.py:197:stop] 0/8446, RunningAvgSamplesPerSec=6.339376247273105, CurrSamplesPerSec=5.726508225755047, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:29:35,917] [INFO] [timer.py:197:stop] 0/8448, RunningAvgSamplesPerSec=6.339383441457143, CurrSamplesPerSec=5.730865185284245, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:29:47,208] [INFO] [timer.py:197:stop] 0/8450, RunningAvgSamplesPerSec=6.3393828566553365, CurrSamplesPerSec=5.689957038867744, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 1.7355555555555555e-06, 'epoch': 17.9} +[2022-12-17 11:29:58,637] [INFO] [timer.py:197:stop] 0/8452, RunningAvgSamplesPerSec=6.339384996625631, CurrSamplesPerSec=5.714973902369629, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:30:09,875] [INFO] [timer.py:197:stop] 0/8454, RunningAvgSamplesPerSec=6.339388946893833, CurrSamplesPerSec=5.712536646700399, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:30:21,138] [INFO] [timer.py:197:stop] 0/8456, RunningAvgSamplesPerSec=6.339391369493708, CurrSamplesPerSec=5.715111881106288, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:30:32,468] [INFO] [timer.py:197:stop] 0/8458, RunningAvgSamplesPerSec=6.339383985489258, CurrSamplesPerSec=5.647728124824058, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:30:43,743] [INFO] [logging.py:68:log_dist] [Rank 0] step=4230, skipped=5, lr=[1.7244444444444448e-06], mom=[[0.9, 0.999]] +[2022-12-17 11:30:43,745] [INFO] [timer.py:197:stop] 0/8460, RunningAvgSamplesPerSec=6.339386521467177, CurrSamplesPerSec=5.718073337086938, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:30:55,006] [INFO] [timer.py:197:stop] 0/8462, RunningAvgSamplesPerSec=6.339391990345543, CurrSamplesPerSec=5.7277765591619945, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:31:06,220] [INFO] [timer.py:197:stop] 0/8464, RunningAvgSamplesPerSec=6.339402510514149, CurrSamplesPerSec=5.751242181357707, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:31:17,439] [INFO] [timer.py:197:stop] 0/8466, RunningAvgSamplesPerSec=6.3394110532341195, CurrSamplesPerSec=5.73423371982176, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:31:28,698] [INFO] [timer.py:197:stop] 0/8468, RunningAvgSamplesPerSec=6.3394181096671325, CurrSamplesPerSec=5.723654941774408, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:31:40,004] [INFO] [timer.py:197:stop] 0/8470, RunningAvgSamplesPerSec=6.339416508958474, CurrSamplesPerSec=5.6938954612453685, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:31:51,293] [INFO] [timer.py:197:stop] 0/8472, RunningAvgSamplesPerSec=6.3394182299776185, CurrSamplesPerSec=5.708501021440033, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:32:02,565] [INFO] [timer.py:197:stop] 0/8474, RunningAvgSamplesPerSec=6.339421522948309, CurrSamplesPerSec=5.715124292218715, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:32:13,874] [INFO] [timer.py:197:stop] 0/8476, RunningAvgSamplesPerSec=6.339421159949795, CurrSamplesPerSec=5.7121568945223995, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:32:25,178] [INFO] [timer.py:197:stop] 0/8478, RunningAvgSamplesPerSec=6.339421078885325, CurrSamplesPerSec=5.707427354620473, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:32:36,476] [INFO] [logging.py:68:log_dist] [Rank 0] step=4240, skipped=5, lr=[1.7022222222222224e-06], mom=[[0.9, 0.999]] +[2022-12-17 11:32:36,477] [INFO] [timer.py:197:stop] 0/8480, RunningAvgSamplesPerSec=6.3394213048783445, CurrSamplesPerSec=5.707564969195127, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:32:47,776] [INFO] [timer.py:197:stop] 0/8482, RunningAvgSamplesPerSec=6.3394211054246545, CurrSamplesPerSec=5.691500040539161, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:32:59,061] [INFO] [timer.py:197:stop] 0/8484, RunningAvgSamplesPerSec=6.339421151126853, CurrSamplesPerSec=5.707390464363309, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:33:10,407] [INFO] [timer.py:197:stop] 0/8486, RunningAvgSamplesPerSec=6.339413566362518, CurrSamplesPerSec=5.662013657402974, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:33:21,713] [INFO] [timer.py:197:stop] 0/8488, RunningAvgSamplesPerSec=6.339409833852926, CurrSamplesPerSec=5.668538529567534, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:33:33,028] [INFO] [timer.py:197:stop] 0/8490, RunningAvgSamplesPerSec=6.339406983036536, CurrSamplesPerSec=5.700460894864342, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:33:44,288] [INFO] [timer.py:197:stop] 0/8492, RunningAvgSamplesPerSec=6.339413427087992, CurrSamplesPerSec=5.715729825287001, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:33:55,568] [INFO] [timer.py:197:stop] 0/8494, RunningAvgSamplesPerSec=6.339413990957854, CurrSamplesPerSec=5.6974368698493985, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:34:04,037] [INFO] [timer.py:197:stop] 0/8496, RunningAvgSamplesPerSec=6.339782427831466, CurrSamplesPerSec=10.235213956730592, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:34:15,340] [INFO] [timer.py:197:stop] 0/8498, RunningAvgSamplesPerSec=6.339781535666853, CurrSamplesPerSec=5.692628565900289, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:34:26,644] [INFO] [logging.py:68:log_dist] [Rank 0] step=4250, skipped=5, lr=[1.6800000000000002e-06], mom=[[0.9, 0.999]] +[2022-12-17 11:34:26,646] [INFO] [timer.py:197:stop] 0/8500, RunningAvgSamplesPerSec=6.339783631474415, CurrSamplesPerSec=5.701057270515074, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 1.6800000000000002e-06, 'epoch': 18.01} +[2022-12-17 11:34:37,949] [INFO] [timer.py:197:stop] 0/8502, RunningAvgSamplesPerSec=6.339780847211561, CurrSamplesPerSec=5.690072825431826, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:34:49,278] [INFO] [timer.py:197:stop] 0/8504, RunningAvgSamplesPerSec=6.339777636169857, CurrSamplesPerSec=5.674001973632397, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:35:00,542] [INFO] [timer.py:197:stop] 0/8506, RunningAvgSamplesPerSec=6.339781747344228, CurrSamplesPerSec=5.719508054321843, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:35:11,842] [INFO] [timer.py:197:stop] 0/8508, RunningAvgSamplesPerSec=6.339781047907795, CurrSamplesPerSec=5.708254598248733, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:35:23,139] [INFO] [timer.py:197:stop] 0/8510, RunningAvgSamplesPerSec=6.339781943068733, CurrSamplesPerSec=5.712835232522119, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:35:34,423] [INFO] [timer.py:197:stop] 0/8512, RunningAvgSamplesPerSec=6.3397839002766805, CurrSamplesPerSec=5.713114881098626, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:35:45,681] [INFO] [timer.py:197:stop] 0/8514, RunningAvgSamplesPerSec=6.3397893828611664, CurrSamplesPerSec=5.719634065067116, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:35:56,965] [INFO] [timer.py:197:stop] 0/8516, RunningAvgSamplesPerSec=6.339792945786711, CurrSamplesPerSec=5.705632662583526, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:36:08,255] [INFO] [timer.py:197:stop] 0/8518, RunningAvgSamplesPerSec=6.339795762011441, CurrSamplesPerSec=5.709598894951428, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:36:19,518] [INFO] [logging.py:68:log_dist] [Rank 0] step=4260, skipped=5, lr=[1.6577777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-17 11:36:19,520] [INFO] [timer.py:197:stop] 0/8520, RunningAvgSamplesPerSec=6.339800319404483, CurrSamplesPerSec=5.715123318796184, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:36:30,824] [INFO] [timer.py:197:stop] 0/8522, RunningAvgSamplesPerSec=6.339802019617752, CurrSamplesPerSec=5.689481157333476, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:36:42,124] [INFO] [timer.py:197:stop] 0/8524, RunningAvgSamplesPerSec=6.33980181371531, CurrSamplesPerSec=5.68836207618135, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:36:53,440] [INFO] [timer.py:197:stop] 0/8526, RunningAvgSamplesPerSec=6.339799072181414, CurrSamplesPerSec=5.693480023593855, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:37:04,800] [INFO] [timer.py:197:stop] 0/8528, RunningAvgSamplesPerSec=6.339796757419858, CurrSamplesPerSec=5.685700331747718, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:37:16,230] [INFO] [timer.py:197:stop] 0/8530, RunningAvgSamplesPerSec=6.339793128090444, CurrSamplesPerSec=5.679584830639335, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:37:27,507] [INFO] [timer.py:197:stop] 0/8532, RunningAvgSamplesPerSec=6.3397911491863415, CurrSamplesPerSec=5.702511080508184, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:37:38,789] [INFO] [timer.py:197:stop] 0/8534, RunningAvgSamplesPerSec=6.339793107232789, CurrSamplesPerSec=5.71491842073463, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:37:50,118] [INFO] [timer.py:197:stop] 0/8536, RunningAvgSamplesPerSec=6.339791267095922, CurrSamplesPerSec=5.678680580952989, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:38:01,439] [INFO] [timer.py:197:stop] 0/8538, RunningAvgSamplesPerSec=6.339787941149314, CurrSamplesPerSec=5.680405705150381, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:38:12,731] [INFO] [logging.py:68:log_dist] [Rank 0] step=4270, skipped=5, lr=[1.6355555555555559e-06], mom=[[0.9, 0.999]] +[2022-12-17 11:38:12,733] [INFO] [timer.py:197:stop] 0/8540, RunningAvgSamplesPerSec=6.339789678550872, CurrSamplesPerSec=5.7091913624282995, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:38:24,019] [INFO] [timer.py:197:stop] 0/8542, RunningAvgSamplesPerSec=6.339791034186154, CurrSamplesPerSec=5.71031428192412, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:38:35,314] [INFO] [timer.py:197:stop] 0/8544, RunningAvgSamplesPerSec=6.33978949614059, CurrSamplesPerSec=5.704379207300573, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:38:46,622] [INFO] [timer.py:197:stop] 0/8546, RunningAvgSamplesPerSec=6.339787753883054, CurrSamplesPerSec=5.6868624601278395, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:38:57,914] [INFO] [timer.py:197:stop] 0/8548, RunningAvgSamplesPerSec=6.339788021027447, CurrSamplesPerSec=5.695277469135737, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:39:09,212] [INFO] [timer.py:197:stop] 0/8550, RunningAvgSamplesPerSec=6.339785911981671, CurrSamplesPerSec=5.690164493137114, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 1.6244444444444447e-06, 'epoch': 18.11} +[2022-12-17 11:39:20,568] [INFO] [timer.py:197:stop] 0/8552, RunningAvgSamplesPerSec=6.339775247921724, CurrSamplesPerSec=5.640723061991974, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:39:31,886] [INFO] [timer.py:197:stop] 0/8554, RunningAvgSamplesPerSec=6.339772199800058, CurrSamplesPerSec=5.696801112656467, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:39:43,155] [INFO] [timer.py:197:stop] 0/8556, RunningAvgSamplesPerSec=6.339773800987133, CurrSamplesPerSec=5.713415959472967, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:39:54,427] [INFO] [timer.py:197:stop] 0/8558, RunningAvgSamplesPerSec=6.339776201349781, CurrSamplesPerSec=5.694955826642644, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:40:05,685] [INFO] [logging.py:68:log_dist] [Rank 0] step=4280, skipped=5, lr=[1.6133333333333335e-06], mom=[[0.9, 0.999]] +[2022-12-17 11:40:05,687] [INFO] [timer.py:197:stop] 0/8560, RunningAvgSamplesPerSec=6.33978132883249, CurrSamplesPerSec=5.7272786896522385, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:40:16,991] [INFO] [timer.py:197:stop] 0/8562, RunningAvgSamplesPerSec=6.33977990750635, CurrSamplesPerSec=5.680848811566373, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:40:28,248] [INFO] [timer.py:197:stop] 0/8564, RunningAvgSamplesPerSec=6.339785807700449, CurrSamplesPerSec=5.744079671759363, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:40:39,502] [INFO] [timer.py:197:stop] 0/8566, RunningAvgSamplesPerSec=6.339791741271043, CurrSamplesPerSec=5.73388757579416, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:40:50,820] [INFO] [timer.py:197:stop] 0/8568, RunningAvgSamplesPerSec=6.339788840323985, CurrSamplesPerSec=5.684582496517594, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:41:02,131] [INFO] [timer.py:197:stop] 0/8570, RunningAvgSamplesPerSec=6.33978424473613, CurrSamplesPerSec=5.664017154219129, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:41:13,415] [INFO] [timer.py:197:stop] 0/8572, RunningAvgSamplesPerSec=6.339786092626063, CurrSamplesPerSec=5.713152088581492, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:41:24,640] [INFO] [timer.py:197:stop] 0/8574, RunningAvgSamplesPerSec=6.339793841673661, CurrSamplesPerSec=5.73153867344533, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:41:35,929] [INFO] [timer.py:197:stop] 0/8576, RunningAvgSamplesPerSec=6.339795662229369, CurrSamplesPerSec=5.677744194300416, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:41:47,173] [INFO] [timer.py:197:stop] 0/8578, RunningAvgSamplesPerSec=6.3398015179993505, CurrSamplesPerSec=5.725435590723989, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:41:58,430] [INFO] [logging.py:68:log_dist] [Rank 0] step=4290, skipped=5, lr=[1.5911111111111113e-06], mom=[[0.9, 0.999]] +[2022-12-17 11:41:58,432] [INFO] [timer.py:197:stop] 0/8580, RunningAvgSamplesPerSec=6.339807066560084, CurrSamplesPerSec=5.726189397922931, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:42:09,769] [INFO] [timer.py:197:stop] 0/8582, RunningAvgSamplesPerSec=6.339806762657493, CurrSamplesPerSec=5.7034345693817015, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:42:21,079] [INFO] [timer.py:197:stop] 0/8584, RunningAvgSamplesPerSec=6.339805077050516, CurrSamplesPerSec=5.69950980922965, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:42:32,387] [INFO] [timer.py:197:stop] 0/8586, RunningAvgSamplesPerSec=6.339804042820197, CurrSamplesPerSec=5.706086263438248, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:42:43,713] [INFO] [timer.py:197:stop] 0/8588, RunningAvgSamplesPerSec=6.339797184392537, CurrSamplesPerSec=5.683306264975196, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:42:55,069] [INFO] [timer.py:197:stop] 0/8590, RunningAvgSamplesPerSec=6.339789078503823, CurrSamplesPerSec=5.658868508175669, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:43:06,329] [INFO] [timer.py:197:stop] 0/8592, RunningAvgSamplesPerSec=6.339791742272315, CurrSamplesPerSec=5.724263992921275, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:43:17,617] [INFO] [timer.py:197:stop] 0/8594, RunningAvgSamplesPerSec=6.339790736510714, CurrSamplesPerSec=5.696077503953731, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:43:28,921] [INFO] [timer.py:197:stop] 0/8596, RunningAvgSamplesPerSec=6.339790044616391, CurrSamplesPerSec=5.703937997968867, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:43:40,242] [INFO] [timer.py:197:stop] 0/8598, RunningAvgSamplesPerSec=6.33978624497434, CurrSamplesPerSec=5.700390684260625, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:43:51,546] [INFO] [logging.py:68:log_dist] [Rank 0] step=4300, skipped=5, lr=[1.568888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-17 11:43:51,548] [INFO] [timer.py:197:stop] 0/8600, RunningAvgSamplesPerSec=6.33978249579551, CurrSamplesPerSec=5.683888465500305, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 1.568888888888889e-06, 'epoch': 18.22} +[2022-12-17 11:44:02,891] [INFO] [timer.py:197:stop] 0/8602, RunningAvgSamplesPerSec=6.339775170207083, CurrSamplesPerSec=5.671931724371231, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:44:14,204] [INFO] [timer.py:197:stop] 0/8604, RunningAvgSamplesPerSec=6.33977109989562, CurrSamplesPerSec=5.672009864781847, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:44:25,515] [INFO] [timer.py:197:stop] 0/8606, RunningAvgSamplesPerSec=6.339769934462846, CurrSamplesPerSec=5.690476909212335, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:44:36,821] [INFO] [timer.py:197:stop] 0/8608, RunningAvgSamplesPerSec=6.339769412472456, CurrSamplesPerSec=5.697288618609522, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:44:48,111] [INFO] [timer.py:197:stop] 0/8610, RunningAvgSamplesPerSec=6.339769140575158, CurrSamplesPerSec=5.71895679212821, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:44:59,394] [INFO] [timer.py:197:stop] 0/8612, RunningAvgSamplesPerSec=6.33977065950526, CurrSamplesPerSec=5.725594836110285, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:45:10,690] [INFO] [timer.py:197:stop] 0/8614, RunningAvgSamplesPerSec=6.339771673358445, CurrSamplesPerSec=5.722855926560641, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:45:21,980] [INFO] [timer.py:197:stop] 0/8616, RunningAvgSamplesPerSec=6.339772835879714, CurrSamplesPerSec=5.707369349759655, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:45:33,267] [INFO] [timer.py:197:stop] 0/8618, RunningAvgSamplesPerSec=6.339774552535556, CurrSamplesPerSec=5.7088997136654624, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:45:44,531] [INFO] [logging.py:68:log_dist] [Rank 0] step=4310, skipped=5, lr=[1.546666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-17 11:45:44,533] [INFO] [timer.py:197:stop] 0/8620, RunningAvgSamplesPerSec=6.339776481679606, CurrSamplesPerSec=5.712077400937806, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:45:55,799] [INFO] [timer.py:197:stop] 0/8622, RunningAvgSamplesPerSec=6.339781219744546, CurrSamplesPerSec=5.729563931908303, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:46:07,095] [INFO] [timer.py:197:stop] 0/8624, RunningAvgSamplesPerSec=6.339781527658315, CurrSamplesPerSec=5.721834663889172, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:46:18,379] [INFO] [timer.py:197:stop] 0/8626, RunningAvgSamplesPerSec=6.339784671445439, CurrSamplesPerSec=5.710062114887612, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:46:29,903] [INFO] [timer.py:197:stop] 0/8628, RunningAvgSamplesPerSec=6.339790082020202, CurrSamplesPerSec=5.723102879952324, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:46:41,210] [INFO] [timer.py:197:stop] 0/8630, RunningAvgSamplesPerSec=6.339793936597643, CurrSamplesPerSec=5.722113730534273, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:46:52,497] [INFO] [timer.py:197:stop] 0/8632, RunningAvgSamplesPerSec=6.339795313337186, CurrSamplesPerSec=5.7167929824432875, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:47:03,759] [INFO] [timer.py:197:stop] 0/8634, RunningAvgSamplesPerSec=6.339801308717311, CurrSamplesPerSec=5.731501715593646, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:47:15,037] [INFO] [timer.py:197:stop] 0/8636, RunningAvgSamplesPerSec=6.33979993801347, CurrSamplesPerSec=5.697033006125454, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:47:26,315] [INFO] [timer.py:197:stop] 0/8638, RunningAvgSamplesPerSec=6.339800368113021, CurrSamplesPerSec=5.720129631190722, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:47:37,577] [INFO] [logging.py:68:log_dist] [Rank 0] step=4320, skipped=5, lr=[1.5244444444444446e-06], mom=[[0.9, 0.999]] +[2022-12-17 11:47:37,578] [INFO] [timer.py:197:stop] 0/8640, RunningAvgSamplesPerSec=6.339805782505671, CurrSamplesPerSec=5.728831976728478, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:47:48,858] [INFO] [timer.py:197:stop] 0/8642, RunningAvgSamplesPerSec=6.339809453462374, CurrSamplesPerSec=5.700420705134696, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:48:00,106] [INFO] [timer.py:197:stop] 0/8644, RunningAvgSamplesPerSec=6.339812171595236, CurrSamplesPerSec=5.729238405192412, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:48:11,360] [INFO] [timer.py:197:stop] 0/8646, RunningAvgSamplesPerSec=6.339814153430132, CurrSamplesPerSec=5.732832008862419, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:48:22,610] [INFO] [timer.py:197:stop] 0/8648, RunningAvgSamplesPerSec=6.339821137286618, CurrSamplesPerSec=5.734705356259326, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:48:33,932] [INFO] [timer.py:197:stop] 0/8650, RunningAvgSamplesPerSec=6.339816322733475, CurrSamplesPerSec=5.6507754896516476, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 1.5133333333333334e-06, 'epoch': 18.33} +[2022-12-17 11:48:45,234] [INFO] [timer.py:197:stop] 0/8652, RunningAvgSamplesPerSec=6.339816083819372, CurrSamplesPerSec=5.699778230623335, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:48:56,531] [INFO] [timer.py:197:stop] 0/8654, RunningAvgSamplesPerSec=6.339819380427446, CurrSamplesPerSec=5.698967960659726, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:49:07,829] [INFO] [timer.py:197:stop] 0/8656, RunningAvgSamplesPerSec=6.339818811451719, CurrSamplesPerSec=5.692353092011138, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:49:19,089] [INFO] [timer.py:197:stop] 0/8658, RunningAvgSamplesPerSec=6.339821531302611, CurrSamplesPerSec=5.7240638095078475, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:49:30,321] [INFO] [logging.py:68:log_dist] [Rank 0] step=4330, skipped=5, lr=[1.5022222222222224e-06], mom=[[0.9, 0.999]] +[2022-12-17 11:49:30,323] [INFO] [timer.py:197:stop] 0/8660, RunningAvgSamplesPerSec=6.339830124316635, CurrSamplesPerSec=5.733025949322644, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:49:41,649] [INFO] [timer.py:197:stop] 0/8662, RunningAvgSamplesPerSec=6.33982594014271, CurrSamplesPerSec=5.679973965287332, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:49:52,927] [INFO] [timer.py:197:stop] 0/8664, RunningAvgSamplesPerSec=6.339828757213136, CurrSamplesPerSec=5.725590195398564, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:50:04,238] [INFO] [timer.py:197:stop] 0/8666, RunningAvgSamplesPerSec=6.339825051306159, CurrSamplesPerSec=5.679188299844723, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:50:15,784] [INFO] [timer.py:197:stop] 0/8668, RunningAvgSamplesPerSec=6.339826278212767, CurrSamplesPerSec=5.722158373965453, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:50:27,046] [INFO] [timer.py:197:stop] 0/8670, RunningAvgSamplesPerSec=6.339826491115368, CurrSamplesPerSec=5.697858448570247, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:50:38,358] [INFO] [timer.py:197:stop] 0/8672, RunningAvgSamplesPerSec=6.339822180424926, CurrSamplesPerSec=5.675786419696285, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:50:49,661] [INFO] [timer.py:197:stop] 0/8674, RunningAvgSamplesPerSec=6.339819656595667, CurrSamplesPerSec=5.699933147171436, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:51:00,951] [INFO] [timer.py:197:stop] 0/8676, RunningAvgSamplesPerSec=6.339821439488037, CurrSamplesPerSec=5.710637661650436, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:51:12,282] [INFO] [timer.py:197:stop] 0/8678, RunningAvgSamplesPerSec=6.339816397132015, CurrSamplesPerSec=5.663841716955458, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:51:23,590] [INFO] [logging.py:68:log_dist] [Rank 0] step=4340, skipped=5, lr=[1.48e-06], mom=[[0.9, 0.999]] +[2022-12-17 11:51:23,592] [INFO] [timer.py:197:stop] 0/8680, RunningAvgSamplesPerSec=6.339814517342316, CurrSamplesPerSec=5.6952871358887025, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:51:34,886] [INFO] [timer.py:197:stop] 0/8682, RunningAvgSamplesPerSec=6.3398151117840555, CurrSamplesPerSec=5.701887997509513, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:51:46,203] [INFO] [timer.py:197:stop] 0/8684, RunningAvgSamplesPerSec=6.339813200596462, CurrSamplesPerSec=5.681629645282856, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:51:57,565] [INFO] [timer.py:197:stop] 0/8686, RunningAvgSamplesPerSec=6.3398039213915185, CurrSamplesPerSec=5.721123459773922, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:52:08,836] [INFO] [timer.py:197:stop] 0/8688, RunningAvgSamplesPerSec=6.339807339755771, CurrSamplesPerSec=5.733363662323854, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:52:20,122] [INFO] [timer.py:197:stop] 0/8690, RunningAvgSamplesPerSec=6.3398047820889225, CurrSamplesPerSec=5.693373275569488, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:52:31,419] [INFO] [timer.py:197:stop] 0/8692, RunningAvgSamplesPerSec=6.339805487152986, CurrSamplesPerSec=5.695563860606969, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:52:42,699] [INFO] [timer.py:197:stop] 0/8694, RunningAvgSamplesPerSec=6.339805809847566, CurrSamplesPerSec=5.699512229507981, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:52:53,996] [INFO] [timer.py:197:stop] 0/8696, RunningAvgSamplesPerSec=6.339806687332358, CurrSamplesPerSec=5.70979588176447, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:53:05,318] [INFO] [timer.py:197:stop] 0/8698, RunningAvgSamplesPerSec=6.339802544669305, CurrSamplesPerSec=5.6829250953489625, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:53:16,607] [INFO] [logging.py:68:log_dist] [Rank 0] step=4350, skipped=5, lr=[1.457777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-17 11:53:16,609] [INFO] [timer.py:197:stop] 0/8700, RunningAvgSamplesPerSec=6.339803640882542, CurrSamplesPerSec=5.697693969817248, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 1.457777777777778e-06, 'epoch': 18.43} +[2022-12-17 11:53:27,908] [INFO] [timer.py:197:stop] 0/8702, RunningAvgSamplesPerSec=6.339803013859382, CurrSamplesPerSec=5.698081961389258, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:53:39,182] [INFO] [timer.py:197:stop] 0/8704, RunningAvgSamplesPerSec=6.339804079590559, CurrSamplesPerSec=5.723818481858318, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:53:50,469] [INFO] [timer.py:197:stop] 0/8706, RunningAvgSamplesPerSec=6.339801296690999, CurrSamplesPerSec=5.673336183153034, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:54:01,762] [INFO] [timer.py:197:stop] 0/8708, RunningAvgSamplesPerSec=6.339801617730664, CurrSamplesPerSec=5.713631451847175, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:54:13,066] [INFO] [timer.py:197:stop] 0/8710, RunningAvgSamplesPerSec=6.339801457413741, CurrSamplesPerSec=5.6995541006484745, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:54:24,345] [INFO] [timer.py:197:stop] 0/8712, RunningAvgSamplesPerSec=6.339804380768808, CurrSamplesPerSec=5.7226387618425285, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:54:35,647] [INFO] [timer.py:197:stop] 0/8714, RunningAvgSamplesPerSec=6.339801671989652, CurrSamplesPerSec=5.700162390700976, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:54:46,960] [INFO] [timer.py:197:stop] 0/8716, RunningAvgSamplesPerSec=6.339799675445553, CurrSamplesPerSec=5.690776571377547, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:54:58,263] [INFO] [timer.py:197:stop] 0/8718, RunningAvgSamplesPerSec=6.33979969029298, CurrSamplesPerSec=5.70129169023307, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:55:09,614] [INFO] [logging.py:68:log_dist] [Rank 0] step=4360, skipped=5, lr=[1.4355555555555557e-06], mom=[[0.9, 0.999]] +[2022-12-17 11:55:09,616] [INFO] [timer.py:197:stop] 0/8720, RunningAvgSamplesPerSec=6.339792131215849, CurrSamplesPerSec=5.65222399744496, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:55:20,926] [INFO] [timer.py:197:stop] 0/8722, RunningAvgSamplesPerSec=6.339789881088182, CurrSamplesPerSec=5.686427569989322, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:55:32,475] [INFO] [timer.py:197:stop] 0/8724, RunningAvgSamplesPerSec=6.339788342912373, CurrSamplesPerSec=5.698039628069896, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:55:43,764] [INFO] [timer.py:197:stop] 0/8726, RunningAvgSamplesPerSec=6.339789115483264, CurrSamplesPerSec=5.707825411412025, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:55:55,085] [INFO] [timer.py:197:stop] 0/8728, RunningAvgSamplesPerSec=6.339784990563931, CurrSamplesPerSec=5.674094083754208, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:56:06,372] [INFO] [timer.py:197:stop] 0/8730, RunningAvgSamplesPerSec=6.339786656439554, CurrSamplesPerSec=5.715914333934802, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:56:17,672] [INFO] [timer.py:197:stop] 0/8732, RunningAvgSamplesPerSec=6.339786147497288, CurrSamplesPerSec=5.721630747784866, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:56:28,943] [INFO] [timer.py:197:stop] 0/8734, RunningAvgSamplesPerSec=6.339789929077884, CurrSamplesPerSec=5.724208574891888, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:56:40,244] [INFO] [timer.py:197:stop] 0/8736, RunningAvgSamplesPerSec=6.339790209997535, CurrSamplesPerSec=5.710170218452381, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:56:51,537] [INFO] [timer.py:197:stop] 0/8738, RunningAvgSamplesPerSec=6.339788493527141, CurrSamplesPerSec=5.702072582554532, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:57:02,815] [INFO] [logging.py:68:log_dist] [Rank 0] step=4370, skipped=5, lr=[1.4133333333333335e-06], mom=[[0.9, 0.999]] +[2022-12-17 11:57:02,817] [INFO] [timer.py:197:stop] 0/8740, RunningAvgSamplesPerSec=6.339789347590431, CurrSamplesPerSec=5.720313936415729, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:57:14,137] [INFO] [timer.py:197:stop] 0/8742, RunningAvgSamplesPerSec=6.339785628082903, CurrSamplesPerSec=5.702052718498164, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:57:25,449] [INFO] [timer.py:197:stop] 0/8744, RunningAvgSamplesPerSec=6.339784106733126, CurrSamplesPerSec=5.686165463352801, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:57:36,809] [INFO] [timer.py:197:stop] 0/8746, RunningAvgSamplesPerSec=6.339777927706155, CurrSamplesPerSec=5.664655416875177, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:57:48,124] [INFO] [timer.py:197:stop] 0/8748, RunningAvgSamplesPerSec=6.339776119560729, CurrSamplesPerSec=5.702787296157063, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:57:59,440] [INFO] [timer.py:197:stop] 0/8750, RunningAvgSamplesPerSec=6.339774011859624, CurrSamplesPerSec=5.697596980180033, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 1.4022222222222223e-06, 'epoch': 18.54} +[2022-12-17 11:58:10,752] [INFO] [timer.py:197:stop] 0/8752, RunningAvgSamplesPerSec=6.339774009482486, CurrSamplesPerSec=5.704056293554354, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:58:22,086] [INFO] [timer.py:197:stop] 0/8754, RunningAvgSamplesPerSec=6.339770577390216, CurrSamplesPerSec=5.692566032678308, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:58:33,408] [INFO] [timer.py:197:stop] 0/8756, RunningAvgSamplesPerSec=6.339767354342624, CurrSamplesPerSec=5.698916660998225, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:58:44,671] [INFO] [timer.py:197:stop] 0/8758, RunningAvgSamplesPerSec=6.339769632773332, CurrSamplesPerSec=5.714297002697882, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:58:55,958] [INFO] [logging.py:68:log_dist] [Rank 0] step=4380, skipped=5, lr=[1.3911111111111111e-06], mom=[[0.9, 0.999]] +[2022-12-17 11:58:55,960] [INFO] [timer.py:197:stop] 0/8760, RunningAvgSamplesPerSec=6.339770445968811, CurrSamplesPerSec=5.699182848523754, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:59:07,275] [INFO] [timer.py:197:stop] 0/8762, RunningAvgSamplesPerSec=6.339767552717385, CurrSamplesPerSec=5.676156550218765, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:59:18,621] [INFO] [timer.py:197:stop] 0/8764, RunningAvgSamplesPerSec=6.33976088010469, CurrSamplesPerSec=5.650065903817682, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:59:29,908] [INFO] [timer.py:197:stop] 0/8766, RunningAvgSamplesPerSec=6.339760982757028, CurrSamplesPerSec=5.719902922799692, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:59:41,162] [INFO] [timer.py:197:stop] 0/8768, RunningAvgSamplesPerSec=6.339764568109337, CurrSamplesPerSec=5.714843230238036, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 11:59:52,461] [INFO] [timer.py:197:stop] 0/8770, RunningAvgSamplesPerSec=6.339763916685576, CurrSamplesPerSec=5.708102870413749, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:00:03,710] [INFO] [timer.py:197:stop] 0/8772, RunningAvgSamplesPerSec=6.339768770506873, CurrSamplesPerSec=5.707572250574403, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:00:15,199] [INFO] [timer.py:197:stop] 0/8774, RunningAvgSamplesPerSec=6.339768808350476, CurrSamplesPerSec=5.703915696891798, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:00:26,473] [INFO] [timer.py:197:stop] 0/8776, RunningAvgSamplesPerSec=6.3397699540445895, CurrSamplesPerSec=5.71347676267275, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:00:37,762] [INFO] [timer.py:197:stop] 0/8778, RunningAvgSamplesPerSec=6.339771212196474, CurrSamplesPerSec=5.716667827312139, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:00:49,067] [INFO] [logging.py:68:log_dist] [Rank 0] step=4390, skipped=5, lr=[1.3688888888888891e-06], mom=[[0.9, 0.999]] +[2022-12-17 12:00:49,069] [INFO] [timer.py:197:stop] 0/8780, RunningAvgSamplesPerSec=6.33977060376882, CurrSamplesPerSec=5.700833039940614, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:01:00,359] [INFO] [timer.py:197:stop] 0/8782, RunningAvgSamplesPerSec=6.3397703540823045, CurrSamplesPerSec=5.686433592944195, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:01:11,689] [INFO] [timer.py:197:stop] 0/8784, RunningAvgSamplesPerSec=6.339767195511378, CurrSamplesPerSec=5.661934120243849, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:01:22,944] [INFO] [timer.py:197:stop] 0/8786, RunningAvgSamplesPerSec=6.339773632242905, CurrSamplesPerSec=5.7119300884010835, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:01:34,220] [INFO] [timer.py:197:stop] 0/8788, RunningAvgSamplesPerSec=6.3397773377908, CurrSamplesPerSec=5.731191631071715, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:01:45,510] [INFO] [timer.py:197:stop] 0/8790, RunningAvgSamplesPerSec=6.339781630455612, CurrSamplesPerSec=5.71911055970979, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:01:56,814] [INFO] [timer.py:197:stop] 0/8792, RunningAvgSamplesPerSec=6.339781805486685, CurrSamplesPerSec=5.695523739898146, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:02:08,149] [INFO] [timer.py:197:stop] 0/8794, RunningAvgSamplesPerSec=6.339777855304927, CurrSamplesPerSec=5.6880899077540805, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:02:19,448] [INFO] [timer.py:197:stop] 0/8796, RunningAvgSamplesPerSec=6.339777923984906, CurrSamplesPerSec=5.70172716149224, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:02:30,805] [INFO] [timer.py:197:stop] 0/8798, RunningAvgSamplesPerSec=6.339769911695078, CurrSamplesPerSec=5.64180165759628, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:02:42,118] [INFO] [logging.py:68:log_dist] [Rank 0] step=4400, skipped=5, lr=[1.3466666666666668e-06], mom=[[0.9, 0.999]] +[2022-12-17 12:02:42,120] [INFO] [timer.py:197:stop] 0/8800, RunningAvgSamplesPerSec=6.339769086325866, CurrSamplesPerSec=5.704153745652911, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 1.3466666666666668e-06, 'epoch': 18.64} +[2022-12-17 12:02:53,392] [INFO] [timer.py:197:stop] 0/8802, RunningAvgSamplesPerSec=6.339773288566415, CurrSamplesPerSec=5.71960189158303, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:03:04,704] [INFO] [timer.py:197:stop] 0/8804, RunningAvgSamplesPerSec=6.339771021610924, CurrSamplesPerSec=5.698482102803512, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:03:15,993] [INFO] [timer.py:197:stop] 0/8806, RunningAvgSamplesPerSec=6.339771096186967, CurrSamplesPerSec=5.7159873617558965, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:03:27,289] [INFO] [timer.py:197:stop] 0/8808, RunningAvgSamplesPerSec=6.339765525460466, CurrSamplesPerSec=5.687666157923678, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:03:38,637] [INFO] [timer.py:197:stop] 0/8810, RunningAvgSamplesPerSec=6.339758676860227, CurrSamplesPerSec=5.67841582462765, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:03:49,898] [INFO] [timer.py:197:stop] 0/8812, RunningAvgSamplesPerSec=6.339757272024986, CurrSamplesPerSec=5.699347655263886, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:04:01,192] [INFO] [timer.py:197:stop] 0/8814, RunningAvgSamplesPerSec=6.339757954583949, CurrSamplesPerSec=5.717018957337291, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:04:12,542] [INFO] [timer.py:197:stop] 0/8816, RunningAvgSamplesPerSec=6.339752702146053, CurrSamplesPerSec=5.670965693668193, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:04:23,824] [INFO] [timer.py:197:stop] 0/8818, RunningAvgSamplesPerSec=6.339757232182007, CurrSamplesPerSec=5.718000012354702, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:04:35,118] [INFO] [logging.py:68:log_dist] [Rank 0] step=4410, skipped=5, lr=[1.3244444444444446e-06], mom=[[0.9, 0.999]] +[2022-12-17 12:04:35,120] [INFO] [timer.py:197:stop] 0/8820, RunningAvgSamplesPerSec=6.339758213641389, CurrSamplesPerSec=5.712680586457322, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:04:46,597] [INFO] [timer.py:197:stop] 0/8822, RunningAvgSamplesPerSec=6.3397591900724315, CurrSamplesPerSec=5.717291709450172, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:04:57,854] [INFO] [timer.py:197:stop] 0/8824, RunningAvgSamplesPerSec=6.339762923742962, CurrSamplesPerSec=5.7159978292298215, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:05:09,122] [INFO] [timer.py:197:stop] 0/8826, RunningAvgSamplesPerSec=6.339765011045626, CurrSamplesPerSec=5.703528607066971, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:05:20,396] [INFO] [timer.py:197:stop] 0/8828, RunningAvgSamplesPerSec=6.33976912955601, CurrSamplesPerSec=5.723405256061477, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:05:31,663] [INFO] [timer.py:197:stop] 0/8830, RunningAvgSamplesPerSec=6.339773451329037, CurrSamplesPerSec=5.722831769221091, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:05:42,916] [INFO] [timer.py:197:stop] 0/8832, RunningAvgSamplesPerSec=6.339779694712146, CurrSamplesPerSec=5.734509587094221, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:05:54,209] [INFO] [timer.py:197:stop] 0/8834, RunningAvgSamplesPerSec=6.339780787868824, CurrSamplesPerSec=5.692312533690265, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:06:05,497] [INFO] [timer.py:197:stop] 0/8836, RunningAvgSamplesPerSec=6.339782808639936, CurrSamplesPerSec=5.699461888141984, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:06:16,791] [INFO] [timer.py:197:stop] 0/8838, RunningAvgSamplesPerSec=6.339784492164005, CurrSamplesPerSec=5.695571353091131, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:06:28,063] [INFO] [logging.py:68:log_dist] [Rank 0] step=4420, skipped=5, lr=[1.3022222222222222e-06], mom=[[0.9, 0.999]] +[2022-12-17 12:06:28,064] [INFO] [timer.py:197:stop] 0/8840, RunningAvgSamplesPerSec=6.339785659528612, CurrSamplesPerSec=5.691189925329324, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:06:39,357] [INFO] [timer.py:197:stop] 0/8842, RunningAvgSamplesPerSec=6.339784956161142, CurrSamplesPerSec=5.699604927715029, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:06:50,654] [INFO] [timer.py:197:stop] 0/8844, RunningAvgSamplesPerSec=6.339786231913779, CurrSamplesPerSec=5.713109044674698, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:07:01,975] [INFO] [timer.py:197:stop] 0/8846, RunningAvgSamplesPerSec=6.339784180655913, CurrSamplesPerSec=5.687275486635333, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:07:13,286] [INFO] [timer.py:197:stop] 0/8848, RunningAvgSamplesPerSec=6.3397832613420135, CurrSamplesPerSec=5.710735581744126, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:07:24,562] [INFO] [timer.py:197:stop] 0/8850, RunningAvgSamplesPerSec=6.339788823688189, CurrSamplesPerSec=5.731127024156072, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 1.2911111111111112e-06, 'epoch': 18.75} +[2022-12-17 12:07:35,838] [INFO] [timer.py:197:stop] 0/8852, RunningAvgSamplesPerSec=6.339792531338122, CurrSamplesPerSec=5.726484770543781, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:07:47,127] [INFO] [timer.py:197:stop] 0/8854, RunningAvgSamplesPerSec=6.339794181176809, CurrSamplesPerSec=5.726559779030069, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:07:58,386] [INFO] [timer.py:197:stop] 0/8856, RunningAvgSamplesPerSec=6.339795549648679, CurrSamplesPerSec=5.70901481544623, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:08:09,671] [INFO] [timer.py:197:stop] 0/8858, RunningAvgSamplesPerSec=6.339797797999399, CurrSamplesPerSec=5.713039008517689, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:08:20,970] [INFO] [logging.py:68:log_dist] [Rank 0] step=4430, skipped=5, lr=[1.28e-06], mom=[[0.9, 0.999]] +[2022-12-17 12:08:20,972] [INFO] [timer.py:197:stop] 0/8860, RunningAvgSamplesPerSec=6.33979745410494, CurrSamplesPerSec=5.698277670452691, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:08:32,265] [INFO] [timer.py:197:stop] 0/8862, RunningAvgSamplesPerSec=6.339798306758194, CurrSamplesPerSec=5.715546058725313, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:08:43,513] [INFO] [timer.py:197:stop] 0/8864, RunningAvgSamplesPerSec=6.339806136942207, CurrSamplesPerSec=5.735413814205769, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:08:54,778] [INFO] [timer.py:197:stop] 0/8866, RunningAvgSamplesPerSec=6.339811109255454, CurrSamplesPerSec=5.736914872436329, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:09:06,063] [INFO] [timer.py:197:stop] 0/8868, RunningAvgSamplesPerSec=6.33981305741132, CurrSamplesPerSec=5.706303629299453, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:09:17,358] [INFO] [timer.py:197:stop] 0/8870, RunningAvgSamplesPerSec=6.339813871437535, CurrSamplesPerSec=5.696534905788065, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:09:28,799] [INFO] [timer.py:197:stop] 0/8872, RunningAvgSamplesPerSec=6.3398134435915, CurrSamplesPerSec=5.698347829014574, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:09:40,104] [INFO] [timer.py:197:stop] 0/8874, RunningAvgSamplesPerSec=6.339812533732992, CurrSamplesPerSec=5.698448957153876, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:09:51,406] [INFO] [timer.py:197:stop] 0/8876, RunningAvgSamplesPerSec=6.33981209534651, CurrSamplesPerSec=5.700115669034691, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:10:02,697] [INFO] [timer.py:197:stop] 0/8878, RunningAvgSamplesPerSec=6.3398137092677285, CurrSamplesPerSec=5.719678669814262, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:10:13,951] [INFO] [logging.py:68:log_dist] [Rank 0] step=4440, skipped=5, lr=[1.2577777777777779e-06], mom=[[0.9, 0.999]] +[2022-12-17 12:10:13,953] [INFO] [timer.py:197:stop] 0/8880, RunningAvgSamplesPerSec=6.339817566446612, CurrSamplesPerSec=5.738804139876291, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:10:25,255] [INFO] [timer.py:197:stop] 0/8882, RunningAvgSamplesPerSec=6.339816636345211, CurrSamplesPerSec=5.694697765622774, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:10:36,527] [INFO] [timer.py:197:stop] 0/8884, RunningAvgSamplesPerSec=6.33982069811268, CurrSamplesPerSec=5.7302190106115445, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:10:47,765] [INFO] [timer.py:197:stop] 0/8886, RunningAvgSamplesPerSec=6.339829384718083, CurrSamplesPerSec=5.74620588582076, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:10:59,051] [INFO] [timer.py:197:stop] 0/8888, RunningAvgSamplesPerSec=6.339830894803963, CurrSamplesPerSec=5.724798697750464, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:11:10,348] [INFO] [timer.py:197:stop] 0/8890, RunningAvgSamplesPerSec=6.339831816636154, CurrSamplesPerSec=5.715971538902973, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:11:21,641] [INFO] [timer.py:197:stop] 0/8892, RunningAvgSamplesPerSec=6.339830535624382, CurrSamplesPerSec=5.6835113091805844, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:11:32,945] [INFO] [timer.py:197:stop] 0/8894, RunningAvgSamplesPerSec=6.339829300115783, CurrSamplesPerSec=5.701985617770172, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:11:44,214] [INFO] [timer.py:197:stop] 0/8896, RunningAvgSamplesPerSec=6.339830957348836, CurrSamplesPerSec=5.710873112905728, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:11:55,472] [INFO] [timer.py:197:stop] 0/8898, RunningAvgSamplesPerSec=6.339832635853104, CurrSamplesPerSec=5.713768636047296, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:12:06,767] [INFO] [logging.py:68:log_dist] [Rank 0] step=4450, skipped=5, lr=[1.2355555555555557e-06], mom=[[0.9, 0.999]] +[2022-12-17 12:12:06,769] [INFO] [timer.py:197:stop] 0/8900, RunningAvgSamplesPerSec=6.339833089257597, CurrSamplesPerSec=5.69870590651442, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 1.2355555555555557e-06, 'epoch': 18.86} +[2022-12-17 12:12:18,030] [INFO] [timer.py:197:stop] 0/8902, RunningAvgSamplesPerSec=6.339837649166962, CurrSamplesPerSec=5.7158598077118485, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:12:29,300] [INFO] [timer.py:197:stop] 0/8904, RunningAvgSamplesPerSec=6.339839674023829, CurrSamplesPerSec=5.711668785350645, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:12:40,622] [INFO] [timer.py:197:stop] 0/8906, RunningAvgSamplesPerSec=6.339835937166133, CurrSamplesPerSec=5.700604226707294, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:12:51,901] [INFO] [timer.py:197:stop] 0/8908, RunningAvgSamplesPerSec=6.339838380970233, CurrSamplesPerSec=5.713043142548713, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:13:03,166] [INFO] [timer.py:197:stop] 0/8910, RunningAvgSamplesPerSec=6.339842998569798, CurrSamplesPerSec=5.715696965497805, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:13:14,456] [INFO] [timer.py:197:stop] 0/8912, RunningAvgSamplesPerSec=6.3398408748108315, CurrSamplesPerSec=5.711529271465982, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:13:25,746] [INFO] [timer.py:197:stop] 0/8914, RunningAvgSamplesPerSec=6.339839533197108, CurrSamplesPerSec=5.693573008732244, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:13:37,015] [INFO] [timer.py:197:stop] 0/8916, RunningAvgSamplesPerSec=6.339840725581212, CurrSamplesPerSec=5.708154092828949, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:13:48,318] [INFO] [timer.py:197:stop] 0/8918, RunningAvgSamplesPerSec=6.339840744759129, CurrSamplesPerSec=5.702353358837302, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:13:59,620] [INFO] [logging.py:68:log_dist] [Rank 0] step=4460, skipped=5, lr=[1.2133333333333335e-06], mom=[[0.9, 0.999]] +[2022-12-17 12:13:59,622] [INFO] [timer.py:197:stop] 0/8920, RunningAvgSamplesPerSec=6.339842393762703, CurrSamplesPerSec=5.709589665323134, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:14:10,959] [INFO] [timer.py:197:stop] 0/8922, RunningAvgSamplesPerSec=6.339837047896928, CurrSamplesPerSec=5.680314111197988, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:14:22,277] [INFO] [timer.py:197:stop] 0/8924, RunningAvgSamplesPerSec=6.33983454166607, CurrSamplesPerSec=5.685408188041345, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:14:33,580] [INFO] [timer.py:197:stop] 0/8926, RunningAvgSamplesPerSec=6.339831781894776, CurrSamplesPerSec=5.699041282062267, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:14:44,921] [INFO] [timer.py:197:stop] 0/8928, RunningAvgSamplesPerSec=6.339826765134753, CurrSamplesPerSec=5.681271305657147, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:14:56,483] [INFO] [timer.py:197:stop] 0/8930, RunningAvgSamplesPerSec=6.3398219933408395, CurrSamplesPerSec=5.671220649710971, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:15:07,811] [INFO] [timer.py:197:stop] 0/8932, RunningAvgSamplesPerSec=6.339817462524407, CurrSamplesPerSec=5.685406502220849, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:15:19,085] [INFO] [timer.py:197:stop] 0/8934, RunningAvgSamplesPerSec=6.339820507025385, CurrSamplesPerSec=5.731930309104549, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:15:30,372] [INFO] [timer.py:197:stop] 0/8936, RunningAvgSamplesPerSec=6.3398200356741485, CurrSamplesPerSec=5.698484522209096, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:15:41,621] [INFO] [timer.py:197:stop] 0/8938, RunningAvgSamplesPerSec=6.3398251188229, CurrSamplesPerSec=5.7179093943535735, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:15:52,917] [INFO] [logging.py:68:log_dist] [Rank 0] step=4470, skipped=5, lr=[1.1911111111111111e-06], mom=[[0.9, 0.999]] +[2022-12-17 12:15:52,918] [INFO] [timer.py:197:stop] 0/8940, RunningAvgSamplesPerSec=6.339826107081712, CurrSamplesPerSec=5.704901231534841, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:16:04,199] [INFO] [timer.py:197:stop] 0/8942, RunningAvgSamplesPerSec=6.33982693796467, CurrSamplesPerSec=5.703309998460055, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:16:15,513] [INFO] [timer.py:197:stop] 0/8944, RunningAvgSamplesPerSec=6.339823082241985, CurrSamplesPerSec=5.684895503862783, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:16:26,828] [INFO] [timer.py:197:stop] 0/8946, RunningAvgSamplesPerSec=6.33982192948109, CurrSamplesPerSec=5.693825653821079, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:16:38,122] [INFO] [timer.py:197:stop] 0/8948, RunningAvgSamplesPerSec=6.339821516488836, CurrSamplesPerSec=5.713446117698301, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:16:49,415] [INFO] [timer.py:197:stop] 0/8950, RunningAvgSamplesPerSec=6.339822438115419, CurrSamplesPerSec=5.707341439891114, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 1.1800000000000001e-06, 'epoch': 18.96} +[2022-12-17 12:17:00,729] [INFO] [timer.py:197:stop] 0/8952, RunningAvgSamplesPerSec=6.339821235613619, CurrSamplesPerSec=5.711667083920755, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:17:12,038] [INFO] [timer.py:197:stop] 0/8954, RunningAvgSamplesPerSec=6.339820683290209, CurrSamplesPerSec=5.704606868918564, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:17:23,332] [INFO] [timer.py:197:stop] 0/8956, RunningAvgSamplesPerSec=6.33982214033586, CurrSamplesPerSec=5.708217211761304, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:17:34,674] [INFO] [timer.py:197:stop] 0/8958, RunningAvgSamplesPerSec=6.339817165687331, CurrSamplesPerSec=5.6624811326537765, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:17:45,974] [INFO] [logging.py:68:log_dist] [Rank 0] step=4480, skipped=5, lr=[1.168888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-17 12:17:45,975] [INFO] [timer.py:197:stop] 0/8960, RunningAvgSamplesPerSec=6.339816877634681, CurrSamplesPerSec=5.697127558306983, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:17:57,229] [INFO] [timer.py:197:stop] 0/8962, RunningAvgSamplesPerSec=6.339821222503794, CurrSamplesPerSec=5.732452981301859, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:18:08,515] [INFO] [timer.py:197:stop] 0/8964, RunningAvgSamplesPerSec=6.339822832371627, CurrSamplesPerSec=5.708728769375107, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:18:19,834] [INFO] [timer.py:197:stop] 0/8966, RunningAvgSamplesPerSec=6.339819980968422, CurrSamplesPerSec=5.691758294557167, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:18:28,319] [INFO] [timer.py:197:stop] 0/8968, RunningAvgSamplesPerSec=6.340167276359993, CurrSamplesPerSec=10.235759569209229, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:18:39,613] [INFO] [timer.py:197:stop] 0/8970, RunningAvgSamplesPerSec=6.340168201205643, CurrSamplesPerSec=5.688185609714782, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:18:50,885] [INFO] [timer.py:197:stop] 0/8972, RunningAvgSamplesPerSec=6.340167215906907, CurrSamplesPerSec=5.68072835094578, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:19:02,136] [INFO] [timer.py:197:stop] 0/8974, RunningAvgSamplesPerSec=6.340170526445283, CurrSamplesPerSec=5.724859987652656, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:19:13,421] [INFO] [timer.py:197:stop] 0/8976, RunningAvgSamplesPerSec=6.340171770538228, CurrSamplesPerSec=5.715203870632199, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:19:24,671] [INFO] [timer.py:197:stop] 0/8978, RunningAvgSamplesPerSec=6.340178150339853, CurrSamplesPerSec=5.705607680246926, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:19:35,941] [INFO] [logging.py:68:log_dist] [Rank 0] step=4490, skipped=5, lr=[1.1466666666666668e-06], mom=[[0.9, 0.999]] +[2022-12-17 12:19:35,943] [INFO] [timer.py:197:stop] 0/8980, RunningAvgSamplesPerSec=6.340180370770885, CurrSamplesPerSec=5.697885298209449, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:19:47,252] [INFO] [timer.py:197:stop] 0/8982, RunningAvgSamplesPerSec=6.3401789882383754, CurrSamplesPerSec=5.693759954279462, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:19:58,646] [INFO] [timer.py:197:stop] 0/8984, RunningAvgSamplesPerSec=6.340187216793461, CurrSamplesPerSec=5.750422141770742, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:20:09,921] [INFO] [timer.py:197:stop] 0/8986, RunningAvgSamplesPerSec=6.340188161061402, CurrSamplesPerSec=5.6917780869534536, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:20:21,256] [INFO] [timer.py:197:stop] 0/8988, RunningAvgSamplesPerSec=6.340183001426609, CurrSamplesPerSec=5.67539185899419, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:20:32,558] [INFO] [timer.py:197:stop] 0/8990, RunningAvgSamplesPerSec=6.3401830253250155, CurrSamplesPerSec=5.701508933152393, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:20:43,826] [INFO] [timer.py:197:stop] 0/8992, RunningAvgSamplesPerSec=6.340185600175935, CurrSamplesPerSec=5.726181824666652, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:20:55,092] [INFO] [timer.py:197:stop] 0/8994, RunningAvgSamplesPerSec=6.340188325164848, CurrSamplesPerSec=5.745743916480419, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:21:06,365] [INFO] [timer.py:197:stop] 0/8996, RunningAvgSamplesPerSec=6.340190745416319, CurrSamplesPerSec=5.722420881672769, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:21:17,714] [INFO] [timer.py:197:stop] 0/8998, RunningAvgSamplesPerSec=6.340184455569013, CurrSamplesPerSec=5.684199952042192, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:21:29,042] [INFO] [logging.py:68:log_dist] [Rank 0] step=4500, skipped=5, lr=[1.1244444444444446e-06], mom=[[0.9, 0.999]] +[2022-12-17 12:21:29,043] [INFO] [timer.py:197:stop] 0/9000, RunningAvgSamplesPerSec=6.3401812963789865, CurrSamplesPerSec=5.70212587704807, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 1.1244444444444446e-06, 'epoch': 19.07} +[2022-12-17 12:21:40,383] [INFO] [timer.py:197:stop] 0/9002, RunningAvgSamplesPerSec=6.340174691416419, CurrSamplesPerSec=5.678948005097177, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:21:51,710] [INFO] [timer.py:197:stop] 0/9004, RunningAvgSamplesPerSec=6.340169051770326, CurrSamplesPerSec=5.696995766485775, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:22:03,013] [INFO] [timer.py:197:stop] 0/9006, RunningAvgSamplesPerSec=6.340169439424454, CurrSamplesPerSec=5.704673788888134, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:22:14,286] [INFO] [timer.py:197:stop] 0/9008, RunningAvgSamplesPerSec=6.340170547414348, CurrSamplesPerSec=5.723376701019164, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:22:25,553] [INFO] [timer.py:197:stop] 0/9010, RunningAvgSamplesPerSec=6.340172793652232, CurrSamplesPerSec=5.716274622624514, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:22:36,828] [INFO] [timer.py:197:stop] 0/9012, RunningAvgSamplesPerSec=6.340174630700916, CurrSamplesPerSec=5.695876144722008, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:22:48,136] [INFO] [timer.py:197:stop] 0/9014, RunningAvgSamplesPerSec=6.340173785718381, CurrSamplesPerSec=5.690204055901287, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:22:59,440] [INFO] [timer.py:197:stop] 0/9016, RunningAvgSamplesPerSec=6.34017389648435, CurrSamplesPerSec=5.714023319927587, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:23:10,759] [INFO] [timer.py:197:stop] 0/9018, RunningAvgSamplesPerSec=6.340171209682552, CurrSamplesPerSec=5.690274498282226, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:23:22,023] [INFO] [logging.py:68:log_dist] [Rank 0] step=4510, skipped=5, lr=[1.1022222222222222e-06], mom=[[0.9, 0.999]] +[2022-12-17 12:23:22,025] [INFO] [timer.py:197:stop] 0/9020, RunningAvgSamplesPerSec=6.340176370473775, CurrSamplesPerSec=5.725990056949864, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:23:33,299] [INFO] [timer.py:197:stop] 0/9022, RunningAvgSamplesPerSec=6.340177698072755, CurrSamplesPerSec=5.703774137599782, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:23:44,547] [INFO] [timer.py:197:stop] 0/9024, RunningAvgSamplesPerSec=6.340182563683797, CurrSamplesPerSec=5.724629729995669, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:23:55,848] [INFO] [timer.py:197:stop] 0/9026, RunningAvgSamplesPerSec=6.3401803594391275, CurrSamplesPerSec=5.691665610045184, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:24:07,132] [INFO] [timer.py:197:stop] 0/9028, RunningAvgSamplesPerSec=6.340180826764234, CurrSamplesPerSec=5.713259336390843, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:24:18,389] [INFO] [timer.py:197:stop] 0/9030, RunningAvgSamplesPerSec=6.340182489916034, CurrSamplesPerSec=5.702290127253462, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:24:29,654] [INFO] [timer.py:197:stop] 0/9032, RunningAvgSamplesPerSec=6.3401870134176725, CurrSamplesPerSec=5.718191488867679, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:24:40,926] [INFO] [timer.py:197:stop] 0/9034, RunningAvgSamplesPerSec=6.340189064722712, CurrSamplesPerSec=5.72053580105053, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:24:52,216] [INFO] [timer.py:197:stop] 0/9036, RunningAvgSamplesPerSec=6.340191777273399, CurrSamplesPerSec=5.707461332910816, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:25:03,638] [INFO] [timer.py:197:stop] 0/9038, RunningAvgSamplesPerSec=6.340195482911859, CurrSamplesPerSec=5.717401791781072, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:25:14,941] [INFO] [logging.py:68:log_dist] [Rank 0] step=4520, skipped=5, lr=[1.08e-06], mom=[[0.9, 0.999]] +[2022-12-17 12:25:14,943] [INFO] [timer.py:197:stop] 0/9040, RunningAvgSamplesPerSec=6.340194721533849, CurrSamplesPerSec=5.693289473929791, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:25:26,277] [INFO] [timer.py:197:stop] 0/9042, RunningAvgSamplesPerSec=6.340189349571833, CurrSamplesPerSec=5.685959264084955, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:25:37,626] [INFO] [timer.py:197:stop] 0/9044, RunningAvgSamplesPerSec=6.340182920822173, CurrSamplesPerSec=5.652958172497661, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:25:48,933] [INFO] [timer.py:197:stop] 0/9046, RunningAvgSamplesPerSec=6.340182202502154, CurrSamplesPerSec=5.681926211518131, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:26:00,260] [INFO] [timer.py:197:stop] 0/9048, RunningAvgSamplesPerSec=6.340178368778449, CurrSamplesPerSec=5.672029999469638, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:26:11,568] [INFO] [timer.py:197:stop] 0/9050, RunningAvgSamplesPerSec=6.340177804213071, CurrSamplesPerSec=5.712056980924633, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 1.068888888888889e-06, 'epoch': 19.17} +[2022-12-17 12:26:22,889] [INFO] [timer.py:197:stop] 0/9052, RunningAvgSamplesPerSec=6.340176452103689, CurrSamplesPerSec=5.711228634922189, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:26:34,174] [INFO] [timer.py:197:stop] 0/9054, RunningAvgSamplesPerSec=6.340178437975144, CurrSamplesPerSec=5.725612666283189, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:26:45,490] [INFO] [timer.py:197:stop] 0/9056, RunningAvgSamplesPerSec=6.340176071690772, CurrSamplesPerSec=5.682095554381373, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:26:56,781] [INFO] [timer.py:197:stop] 0/9058, RunningAvgSamplesPerSec=6.340175540872804, CurrSamplesPerSec=5.678521772244945, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:27:08,081] [INFO] [logging.py:68:log_dist] [Rank 0] step=4530, skipped=5, lr=[1.0577777777777779e-06], mom=[[0.9, 0.999]] +[2022-12-17 12:27:08,083] [INFO] [timer.py:197:stop] 0/9060, RunningAvgSamplesPerSec=6.340176084418741, CurrSamplesPerSec=5.714119653272828, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:27:19,412] [INFO] [timer.py:197:stop] 0/9062, RunningAvgSamplesPerSec=6.340172492979608, CurrSamplesPerSec=5.6913443753728625, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:27:30,750] [INFO] [timer.py:197:stop] 0/9064, RunningAvgSamplesPerSec=6.340167495179233, CurrSamplesPerSec=5.682358969947255, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:27:42,051] [INFO] [timer.py:197:stop] 0/9066, RunningAvgSamplesPerSec=6.340168341948707, CurrSamplesPerSec=5.705136452494435, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:27:53,438] [INFO] [timer.py:197:stop] 0/9068, RunningAvgSamplesPerSec=6.340156127963793, CurrSamplesPerSec=5.644270297923019, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:28:04,764] [INFO] [timer.py:197:stop] 0/9070, RunningAvgSamplesPerSec=6.340150850458402, CurrSamplesPerSec=5.682519196291389, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:28:16,074] [INFO] [timer.py:197:stop] 0/9072, RunningAvgSamplesPerSec=6.340147652003948, CurrSamplesPerSec=5.682816817849004, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:28:27,427] [INFO] [timer.py:197:stop] 0/9074, RunningAvgSamplesPerSec=6.340137880887122, CurrSamplesPerSec=5.662173932761074, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:28:38,748] [INFO] [timer.py:197:stop] 0/9076, RunningAvgSamplesPerSec=6.340134725198894, CurrSamplesPerSec=5.695298977705807, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:28:50,058] [INFO] [timer.py:197:stop] 0/9078, RunningAvgSamplesPerSec=6.3401333643651965, CurrSamplesPerSec=5.709362820154792, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:29:01,317] [INFO] [logging.py:68:log_dist] [Rank 0] step=4540, skipped=5, lr=[1.0355555555555557e-06], mom=[[0.9, 0.999]] +[2022-12-17 12:29:01,319] [INFO] [timer.py:197:stop] 0/9080, RunningAvgSamplesPerSec=6.340137162093382, CurrSamplesPerSec=5.72625462647566, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:29:12,608] [INFO] [timer.py:197:stop] 0/9082, RunningAvgSamplesPerSec=6.3401393811599265, CurrSamplesPerSec=5.711931789987668, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:29:23,896] [INFO] [timer.py:197:stop] 0/9084, RunningAvgSamplesPerSec=6.34014109355379, CurrSamplesPerSec=5.710250387784129, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:29:35,172] [INFO] [timer.py:197:stop] 0/9086, RunningAvgSamplesPerSec=6.340143988565522, CurrSamplesPerSec=5.714197257496509, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:29:46,509] [INFO] [timer.py:197:stop] 0/9088, RunningAvgSamplesPerSec=6.340138799723675, CurrSamplesPerSec=5.687047278459022, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:29:57,807] [INFO] [timer.py:197:stop] 0/9090, RunningAvgSamplesPerSec=6.340138699289519, CurrSamplesPerSec=5.699062819070333, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:30:09,279] [INFO] [timer.py:197:stop] 0/9092, RunningAvgSamplesPerSec=6.340141479739859, CurrSamplesPerSec=5.705302088787175, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:30:20,576] [INFO] [timer.py:197:stop] 0/9094, RunningAvgSamplesPerSec=6.340141333697836, CurrSamplesPerSec=5.702480553052258, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:30:31,849] [INFO] [timer.py:197:stop] 0/9096, RunningAvgSamplesPerSec=6.340142391459165, CurrSamplesPerSec=5.707457206953979, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:30:43,139] [INFO] [timer.py:197:stop] 0/9098, RunningAvgSamplesPerSec=6.340144053045042, CurrSamplesPerSec=5.693934834353835, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:30:54,423] [INFO] [logging.py:68:log_dist] [Rank 0] step=4550, skipped=5, lr=[1.0133333333333333e-06], mom=[[0.9, 0.999]] +[2022-12-17 12:30:54,424] [INFO] [timer.py:197:stop] 0/9100, RunningAvgSamplesPerSec=6.34014581087975, CurrSamplesPerSec=5.70825823981589, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 1.0133333333333333e-06, 'epoch': 19.28} +[2022-12-17 12:31:05,693] [INFO] [timer.py:197:stop] 0/9102, RunningAvgSamplesPerSec=6.340146300653431, CurrSamplesPerSec=5.721616113216765, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:31:16,984] [INFO] [timer.py:197:stop] 0/9104, RunningAvgSamplesPerSec=6.340146021568912, CurrSamplesPerSec=5.704086595467083, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:31:28,249] [INFO] [timer.py:197:stop] 0/9106, RunningAvgSamplesPerSec=6.340149200742008, CurrSamplesPerSec=5.708663211070656, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:31:39,554] [INFO] [timer.py:197:stop] 0/9108, RunningAvgSamplesPerSec=6.340145155623433, CurrSamplesPerSec=5.688826920042316, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:31:50,834] [INFO] [timer.py:197:stop] 0/9110, RunningAvgSamplesPerSec=6.340147094179502, CurrSamplesPerSec=5.716423376674596, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:32:02,157] [INFO] [timer.py:197:stop] 0/9112, RunningAvgSamplesPerSec=6.340143520926042, CurrSamplesPerSec=5.692019227323512, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:32:13,411] [INFO] [timer.py:197:stop] 0/9114, RunningAvgSamplesPerSec=6.340147602549759, CurrSamplesPerSec=5.720771581562675, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:32:24,684] [INFO] [timer.py:197:stop] 0/9116, RunningAvgSamplesPerSec=6.340150929871223, CurrSamplesPerSec=5.727981891384154, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:32:36,009] [INFO] [timer.py:197:stop] 0/9118, RunningAvgSamplesPerSec=6.340147143194762, CurrSamplesPerSec=5.6731167655569115, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:32:47,314] [INFO] [logging.py:68:log_dist] [Rank 0] step=4560, skipped=5, lr=[9.911111111111111e-07], mom=[[0.9, 0.999]] +[2022-12-17 12:32:47,316] [INFO] [timer.py:197:stop] 0/9120, RunningAvgSamplesPerSec=6.3401461025404995, CurrSamplesPerSec=5.6980246301305115, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:32:58,605] [INFO] [timer.py:197:stop] 0/9122, RunningAvgSamplesPerSec=6.340147576847911, CurrSamplesPerSec=5.700921422456217, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:33:09,905] [INFO] [timer.py:197:stop] 0/9124, RunningAvgSamplesPerSec=6.340147927191419, CurrSamplesPerSec=5.72497597811113, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:33:21,197] [INFO] [timer.py:197:stop] 0/9126, RunningAvgSamplesPerSec=6.340147353261547, CurrSamplesPerSec=5.711897272286692, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:33:32,479] [INFO] [timer.py:197:stop] 0/9128, RunningAvgSamplesPerSec=6.340149692615916, CurrSamplesPerSec=5.710304564092, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:33:43,740] [INFO] [timer.py:197:stop] 0/9130, RunningAvgSamplesPerSec=6.340155213278866, CurrSamplesPerSec=5.734318486158727, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:33:55,041] [INFO] [timer.py:197:stop] 0/9132, RunningAvgSamplesPerSec=6.340155343894294, CurrSamplesPerSec=5.669024083496483, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:34:06,316] [INFO] [timer.py:197:stop] 0/9134, RunningAvgSamplesPerSec=6.340156480313296, CurrSamplesPerSec=5.702799896105481, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:34:17,658] [INFO] [timer.py:197:stop] 0/9136, RunningAvgSamplesPerSec=6.340154367953798, CurrSamplesPerSec=5.681481253524794, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:34:29,045] [INFO] [timer.py:197:stop] 0/9138, RunningAvgSamplesPerSec=6.340143965937071, CurrSamplesPerSec=5.610906831033096, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:34:40,324] [INFO] [logging.py:68:log_dist] [Rank 0] step=4570, skipped=5, lr=[9.68888888888889e-07], mom=[[0.9, 0.999]] +[2022-12-17 12:34:40,325] [INFO] [timer.py:197:stop] 0/9140, RunningAvgSamplesPerSec=6.340147034487641, CurrSamplesPerSec=5.708202402963382, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:34:51,575] [INFO] [timer.py:197:stop] 0/9142, RunningAvgSamplesPerSec=6.34015044490484, CurrSamplesPerSec=5.726932162951941, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:35:02,854] [INFO] [timer.py:197:stop] 0/9144, RunningAvgSamplesPerSec=6.34015603016963, CurrSamplesPerSec=5.721095659088879, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:35:14,179] [INFO] [timer.py:197:stop] 0/9146, RunningAvgSamplesPerSec=6.340152719323789, CurrSamplesPerSec=5.68065429785044, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:35:25,457] [INFO] [timer.py:197:stop] 0/9148, RunningAvgSamplesPerSec=6.340155277784047, CurrSamplesPerSec=5.706863374094032, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:35:36,737] [INFO] [timer.py:197:stop] 0/9150, RunningAvgSamplesPerSec=6.340158367560466, CurrSamplesPerSec=5.708289314711287, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 9.57777777777778e-07, 'epoch': 19.39} +[2022-12-17 12:35:48,008] [INFO] [timer.py:197:stop] 0/9152, RunningAvgSamplesPerSec=6.340162253630085, CurrSamplesPerSec=5.718088684361913, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:35:59,294] [INFO] [timer.py:197:stop] 0/9154, RunningAvgSamplesPerSec=6.340162321852701, CurrSamplesPerSec=5.711533403312362, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:36:10,578] [INFO] [timer.py:197:stop] 0/9156, RunningAvgSamplesPerSec=6.340164153551402, CurrSamplesPerSec=5.707124722983757, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:36:21,870] [INFO] [timer.py:197:stop] 0/9158, RunningAvgSamplesPerSec=6.340164749961444, CurrSamplesPerSec=5.717264189529792, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:36:33,149] [INFO] [logging.py:68:log_dist] [Rank 0] step=4580, skipped=5, lr=[9.466666666666667e-07], mom=[[0.9, 0.999]] +[2022-12-17 12:36:33,151] [INFO] [timer.py:197:stop] 0/9160, RunningAvgSamplesPerSec=6.340164648943723, CurrSamplesPerSec=5.696304019888085, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:36:44,435] [INFO] [timer.py:197:stop] 0/9162, RunningAvgSamplesPerSec=6.340166541679684, CurrSamplesPerSec=5.689125685010388, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:36:55,813] [INFO] [timer.py:197:stop] 0/9164, RunningAvgSamplesPerSec=6.340167713844468, CurrSamplesPerSec=5.702747315920129, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:37:07,100] [INFO] [timer.py:197:stop] 0/9166, RunningAvgSamplesPerSec=6.34016700060534, CurrSamplesPerSec=5.7019269967660495, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:37:18,408] [INFO] [timer.py:197:stop] 0/9168, RunningAvgSamplesPerSec=6.340164707006909, CurrSamplesPerSec=5.666900997116174, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:37:29,697] [INFO] [timer.py:197:stop] 0/9170, RunningAvgSamplesPerSec=6.340164121910339, CurrSamplesPerSec=5.690992772376195, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:37:40,949] [INFO] [timer.py:197:stop] 0/9172, RunningAvgSamplesPerSec=6.340168160252227, CurrSamplesPerSec=5.71007766211458, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:37:52,256] [INFO] [timer.py:197:stop] 0/9174, RunningAvgSamplesPerSec=6.3401676752621166, CurrSamplesPerSec=5.684985319257441, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:38:03,683] [INFO] [timer.py:197:stop] 0/9176, RunningAvgSamplesPerSec=6.340150339663452, CurrSamplesPerSec=5.5800397327838285, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:38:14,979] [INFO] [timer.py:197:stop] 0/9178, RunningAvgSamplesPerSec=6.340148930847332, CurrSamplesPerSec=5.673131392868632, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:38:26,248] [INFO] [logging.py:68:log_dist] [Rank 0] step=4590, skipped=5, lr=[9.244444444444445e-07], mom=[[0.9, 0.999]] +[2022-12-17 12:38:26,250] [INFO] [timer.py:197:stop] 0/9180, RunningAvgSamplesPerSec=6.340150298991688, CurrSamplesPerSec=5.698743410477561, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:38:37,552] [INFO] [timer.py:197:stop] 0/9182, RunningAvgSamplesPerSec=6.3401497455325915, CurrSamplesPerSec=5.7135380536078735, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:38:49,093] [INFO] [timer.py:197:stop] 0/9184, RunningAvgSamplesPerSec=6.3401491895422035, CurrSamplesPerSec=5.686618865268576, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:39:00,363] [INFO] [timer.py:197:stop] 0/9186, RunningAvgSamplesPerSec=6.340153255419131, CurrSamplesPerSec=5.7045084315268895, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:39:11,652] [INFO] [timer.py:197:stop] 0/9188, RunningAvgSamplesPerSec=6.340155155704845, CurrSamplesPerSec=5.7096839060888716, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:39:22,930] [INFO] [timer.py:197:stop] 0/9190, RunningAvgSamplesPerSec=6.340158223776878, CurrSamplesPerSec=5.719141996572548, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:39:34,228] [INFO] [timer.py:197:stop] 0/9192, RunningAvgSamplesPerSec=6.340159418782378, CurrSamplesPerSec=5.713817770997784, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:39:45,529] [INFO] [timer.py:197:stop] 0/9194, RunningAvgSamplesPerSec=6.340158826753314, CurrSamplesPerSec=5.699113879369092, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:39:56,806] [INFO] [timer.py:197:stop] 0/9196, RunningAvgSamplesPerSec=6.340156998854702, CurrSamplesPerSec=5.70595648256646, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:40:08,101] [INFO] [timer.py:197:stop] 0/9198, RunningAvgSamplesPerSec=6.340155925570966, CurrSamplesPerSec=5.7080890332365515, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:40:19,421] [INFO] [logging.py:68:log_dist] [Rank 0] step=4600, skipped=5, lr=[9.022222222222222e-07], mom=[[0.9, 0.999]] +[2022-12-17 12:40:19,423] [INFO] [timer.py:197:stop] 0/9200, RunningAvgSamplesPerSec=6.34015258430215, CurrSamplesPerSec=5.692694480783714, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 9.022222222222222e-07, 'epoch': 19.49} +[2022-12-17 12:40:30,728] [INFO] [timer.py:197:stop] 0/9202, RunningAvgSamplesPerSec=6.340152615043308, CurrSamplesPerSec=5.710397613692423, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:40:42,007] [INFO] [timer.py:197:stop] 0/9204, RunningAvgSamplesPerSec=6.340153859145852, CurrSamplesPerSec=5.711524410477892, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:40:53,276] [INFO] [timer.py:197:stop] 0/9206, RunningAvgSamplesPerSec=6.340159289461381, CurrSamplesPerSec=5.735856965083715, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:41:04,547] [INFO] [timer.py:197:stop] 0/9208, RunningAvgSamplesPerSec=6.3401623345042974, CurrSamplesPerSec=5.711224746540432, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:41:15,941] [INFO] [timer.py:197:stop] 0/9210, RunningAvgSamplesPerSec=6.340157326838278, CurrSamplesPerSec=5.669542530462479, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:41:27,264] [INFO] [timer.py:197:stop] 0/9212, RunningAvgSamplesPerSec=6.340156287015333, CurrSamplesPerSec=5.69148555969936, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:41:38,552] [INFO] [timer.py:197:stop] 0/9214, RunningAvgSamplesPerSec=6.340158164960546, CurrSamplesPerSec=5.730159073735827, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:41:49,875] [INFO] [timer.py:197:stop] 0/9216, RunningAvgSamplesPerSec=6.340159228571226, CurrSamplesPerSec=5.7013219627797875, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:42:01,219] [INFO] [timer.py:197:stop] 0/9218, RunningAvgSamplesPerSec=6.340153976221815, CurrSamplesPerSec=5.692182654384938, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:42:12,499] [INFO] [logging.py:68:log_dist] [Rank 0] step=4610, skipped=5, lr=[8.8e-07], mom=[[0.9, 0.999]] +[2022-12-17 12:42:12,500] [INFO] [timer.py:197:stop] 0/9220, RunningAvgSamplesPerSec=6.340153403841933, CurrSamplesPerSec=5.710089808444558, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:42:23,829] [INFO] [timer.py:197:stop] 0/9222, RunningAvgSamplesPerSec=6.340147555697972, CurrSamplesPerSec=5.681393713237994, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:42:35,135] [INFO] [timer.py:197:stop] 0/9224, RunningAvgSamplesPerSec=6.340147942549898, CurrSamplesPerSec=5.702918629122964, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:42:46,412] [INFO] [timer.py:197:stop] 0/9226, RunningAvgSamplesPerSec=6.340149254212709, CurrSamplesPerSec=5.708072768570178, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:42:57,721] [INFO] [timer.py:197:stop] 0/9228, RunningAvgSamplesPerSec=6.340148567886221, CurrSamplesPerSec=5.713274414636991, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:43:08,988] [INFO] [timer.py:197:stop] 0/9230, RunningAvgSamplesPerSec=6.340153230507024, CurrSamplesPerSec=5.716390752374001, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:43:20,259] [INFO] [timer.py:197:stop] 0/9232, RunningAvgSamplesPerSec=6.340157138416786, CurrSamplesPerSec=5.729755939087335, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:43:31,756] [INFO] [timer.py:197:stop] 0/9234, RunningAvgSamplesPerSec=6.34015718668598, CurrSamplesPerSec=5.6950486183439, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:43:43,022] [INFO] [timer.py:197:stop] 0/9236, RunningAvgSamplesPerSec=6.340160196742237, CurrSamplesPerSec=5.720007986497109, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:43:54,300] [INFO] [timer.py:197:stop] 0/9238, RunningAvgSamplesPerSec=6.3401630097358055, CurrSamplesPerSec=5.715744429759032, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:44:05,634] [INFO] [logging.py:68:log_dist] [Rank 0] step=4620, skipped=5, lr=[8.577777777777778e-07], mom=[[0.9, 0.999]] +[2022-12-17 12:44:05,636] [INFO] [timer.py:197:stop] 0/9240, RunningAvgSamplesPerSec=6.340158126370072, CurrSamplesPerSec=5.682443893595855, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:44:16,933] [INFO] [timer.py:197:stop] 0/9242, RunningAvgSamplesPerSec=6.340158936768335, CurrSamplesPerSec=5.700625775529919, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:44:28,218] [INFO] [timer.py:197:stop] 0/9244, RunningAvgSamplesPerSec=6.3401615269069636, CurrSamplesPerSec=5.719573618214001, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:44:39,517] [INFO] [timer.py:197:stop] 0/9246, RunningAvgSamplesPerSec=6.340161629533877, CurrSamplesPerSec=5.72056944795954, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:44:50,799] [INFO] [timer.py:197:stop] 0/9248, RunningAvgSamplesPerSec=6.340161049942304, CurrSamplesPerSec=5.71187539508658, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:45:02,081] [INFO] [timer.py:197:stop] 0/9250, RunningAvgSamplesPerSec=6.340161406104777, CurrSamplesPerSec=5.69962477472045, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 8.466666666666668e-07, 'epoch': 19.6} +[2022-12-17 12:45:13,396] [INFO] [timer.py:197:stop] 0/9252, RunningAvgSamplesPerSec=6.3401600092698365, CurrSamplesPerSec=5.68532630648655, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:45:24,715] [INFO] [timer.py:197:stop] 0/9254, RunningAvgSamplesPerSec=6.340158751716345, CurrSamplesPerSec=5.694735700092881, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:45:36,020] [INFO] [timer.py:197:stop] 0/9256, RunningAvgSamplesPerSec=6.340158815250996, CurrSamplesPerSec=5.703624102116573, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:45:47,319] [INFO] [timer.py:197:stop] 0/9258, RunningAvgSamplesPerSec=6.34015930739022, CurrSamplesPerSec=5.7091872339697884, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:45:58,600] [INFO] [logging.py:68:log_dist] [Rank 0] step=4630, skipped=5, lr=[8.355555555555556e-07], mom=[[0.9, 0.999]] +[2022-12-17 12:45:58,601] [INFO] [timer.py:197:stop] 0/9260, RunningAvgSamplesPerSec=6.340159115245336, CurrSamplesPerSec=5.70457510672883, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:46:09,916] [INFO] [timer.py:197:stop] 0/9262, RunningAvgSamplesPerSec=6.3401568534304005, CurrSamplesPerSec=5.684320319201461, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:46:21,205] [INFO] [timer.py:197:stop] 0/9264, RunningAvgSamplesPerSec=6.340159812195485, CurrSamplesPerSec=5.72261851024271, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:46:32,485] [INFO] [timer.py:197:stop] 0/9266, RunningAvgSamplesPerSec=6.340162767843043, CurrSamplesPerSec=5.7171287856386295, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:46:43,801] [INFO] [timer.py:197:stop] 0/9268, RunningAvgSamplesPerSec=6.340160434655951, CurrSamplesPerSec=5.700246636884618, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:46:55,126] [INFO] [timer.py:197:stop] 0/9270, RunningAvgSamplesPerSec=6.340157542329263, CurrSamplesPerSec=5.70487746802184, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:47:06,461] [INFO] [timer.py:197:stop] 0/9272, RunningAvgSamplesPerSec=6.340155016075218, CurrSamplesPerSec=5.674236332603981, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:47:17,758] [INFO] [timer.py:197:stop] 0/9274, RunningAvgSamplesPerSec=6.340155935623798, CurrSamplesPerSec=5.690579205791651, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:47:29,030] [INFO] [timer.py:197:stop] 0/9276, RunningAvgSamplesPerSec=6.340157051769977, CurrSamplesPerSec=5.704070838432285, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:47:40,303] [INFO] [timer.py:197:stop] 0/9278, RunningAvgSamplesPerSec=6.340160005413446, CurrSamplesPerSec=5.721028597144834, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:47:51,671] [INFO] [logging.py:68:log_dist] [Rank 0] step=4640, skipped=5, lr=[8.133333333333333e-07], mom=[[0.9, 0.999]] +[2022-12-17 12:47:51,672] [INFO] [timer.py:197:stop] 0/9280, RunningAvgSamplesPerSec=6.340162249396748, CurrSamplesPerSec=5.72207738202556, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:48:03,000] [INFO] [timer.py:197:stop] 0/9282, RunningAvgSamplesPerSec=6.3401583228653395, CurrSamplesPerSec=5.687393574085657, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:48:14,284] [INFO] [timer.py:197:stop] 0/9284, RunningAvgSamplesPerSec=6.340160924269768, CurrSamplesPerSec=5.718414894341315, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:48:25,575] [INFO] [timer.py:197:stop] 0/9286, RunningAvgSamplesPerSec=6.340162293247801, CurrSamplesPerSec=5.72458724545319, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:48:36,883] [INFO] [timer.py:197:stop] 0/9288, RunningAvgSamplesPerSec=6.340161675242742, CurrSamplesPerSec=5.710935319988416, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:48:48,180] [INFO] [timer.py:197:stop] 0/9290, RunningAvgSamplesPerSec=6.340161733304246, CurrSamplesPerSec=5.698499038685744, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:48:59,439] [INFO] [timer.py:197:stop] 0/9292, RunningAvgSamplesPerSec=6.340164248884015, CurrSamplesPerSec=5.731903872013158, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:49:10,738] [INFO] [timer.py:197:stop] 0/9294, RunningAvgSamplesPerSec=6.340163637335254, CurrSamplesPerSec=5.700694539154415, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:49:22,054] [INFO] [timer.py:197:stop] 0/9296, RunningAvgSamplesPerSec=6.340161429995194, CurrSamplesPerSec=5.711878312036912, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:49:33,345] [INFO] [timer.py:197:stop] 0/9298, RunningAvgSamplesPerSec=6.340161402231518, CurrSamplesPerSec=5.715301217270755, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:49:44,665] [INFO] [logging.py:68:log_dist] [Rank 0] step=4650, skipped=5, lr=[7.911111111111111e-07], mom=[[0.9, 0.999]] +[2022-12-17 12:49:44,666] [INFO] [timer.py:197:stop] 0/9300, RunningAvgSamplesPerSec=6.34015879446203, CurrSamplesPerSec=5.687757025009477, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 7.911111111111111e-07, 'epoch': 19.7} +[2022-12-17 12:49:55,963] [INFO] [timer.py:197:stop] 0/9302, RunningAvgSamplesPerSec=6.340159094918814, CurrSamplesPerSec=5.705109291970435, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:50:07,237] [INFO] [timer.py:197:stop] 0/9304, RunningAvgSamplesPerSec=6.340162530518254, CurrSamplesPerSec=5.725340585042284, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:50:18,508] [INFO] [timer.py:197:stop] 0/9306, RunningAvgSamplesPerSec=6.340164719469053, CurrSamplesPerSec=5.717111982365678, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:50:29,827] [INFO] [timer.py:197:stop] 0/9308, RunningAvgSamplesPerSec=6.340163259169347, CurrSamplesPerSec=5.684242802166537, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:50:41,100] [INFO] [timer.py:197:stop] 0/9310, RunningAvgSamplesPerSec=6.340167314338288, CurrSamplesPerSec=5.725055586914081, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:50:52,364] [INFO] [timer.py:197:stop] 0/9312, RunningAvgSamplesPerSec=6.340169757275838, CurrSamplesPerSec=5.718095505399457, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:51:03,633] [INFO] [timer.py:197:stop] 0/9314, RunningAvgSamplesPerSec=6.34017535552695, CurrSamplesPerSec=5.7352133403440115, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:51:14,926] [INFO] [timer.py:197:stop] 0/9316, RunningAvgSamplesPerSec=6.340176615455552, CurrSamplesPerSec=5.713768149566863, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:51:26,259] [INFO] [timer.py:197:stop] 0/9318, RunningAvgSamplesPerSec=6.340175941131643, CurrSamplesPerSec=5.683389532316705, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:51:37,560] [INFO] [logging.py:68:log_dist] [Rank 0] step=4660, skipped=5, lr=[7.688888888888891e-07], mom=[[0.9, 0.999]] +[2022-12-17 12:51:37,562] [INFO] [timer.py:197:stop] 0/9320, RunningAvgSamplesPerSec=6.3401753738723485, CurrSamplesPerSec=5.697270722570649, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:51:48,845] [INFO] [timer.py:197:stop] 0/9322, RunningAvgSamplesPerSec=6.3401783804499825, CurrSamplesPerSec=5.707954791824532, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:52:00,169] [INFO] [timer.py:197:stop] 0/9324, RunningAvgSamplesPerSec=6.340176036842008, CurrSamplesPerSec=5.676642689896154, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:52:11,437] [INFO] [timer.py:197:stop] 0/9326, RunningAvgSamplesPerSec=6.340181403984736, CurrSamplesPerSec=5.719313321363565, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:52:22,736] [INFO] [timer.py:197:stop] 0/9328, RunningAvgSamplesPerSec=6.3401792682857, CurrSamplesPerSec=5.675037425141788, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:52:34,009] [INFO] [timer.py:197:stop] 0/9330, RunningAvgSamplesPerSec=6.3401827131228385, CurrSamplesPerSec=5.709366948867249, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:52:45,315] [INFO] [timer.py:197:stop] 0/9332, RunningAvgSamplesPerSec=6.34018176622586, CurrSamplesPerSec=5.687114028058006, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:52:56,714] [INFO] [timer.py:197:stop] 0/9334, RunningAvgSamplesPerSec=6.3401772493220765, CurrSamplesPerSec=5.684735865809839, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:53:08,032] [INFO] [timer.py:197:stop] 0/9336, RunningAvgSamplesPerSec=6.3401745095874045, CurrSamplesPerSec=5.7041876849764614, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:53:19,331] [INFO] [timer.py:197:stop] 0/9338, RunningAvgSamplesPerSec=6.340172921987333, CurrSamplesPerSec=5.710699863603781, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:53:30,643] [INFO] [logging.py:68:log_dist] [Rank 0] step=4670, skipped=5, lr=[7.466666666666668e-07], mom=[[0.9, 0.999]] +[2022-12-17 12:53:30,645] [INFO] [timer.py:197:stop] 0/9340, RunningAvgSamplesPerSec=6.34017212807068, CurrSamplesPerSec=5.681568796509319, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:53:41,940] [INFO] [timer.py:197:stop] 0/9342, RunningAvgSamplesPerSec=6.340172906628067, CurrSamplesPerSec=5.725945353710456, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:53:53,255] [INFO] [timer.py:197:stop] 0/9344, RunningAvgSamplesPerSec=6.3401717534789475, CurrSamplesPerSec=5.705783531440709, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:54:04,544] [INFO] [timer.py:197:stop] 0/9346, RunningAvgSamplesPerSec=6.3401735765286045, CurrSamplesPerSec=5.724853638836918, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:54:15,850] [INFO] [timer.py:197:stop] 0/9348, RunningAvgSamplesPerSec=6.340168712130555, CurrSamplesPerSec=5.684671338945308, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:54:27,137] [INFO] [timer.py:197:stop] 0/9350, RunningAvgSamplesPerSec=6.340168979319973, CurrSamplesPerSec=5.704432787366835, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 7.355555555555556e-07, 'epoch': 19.81} +[2022-12-17 12:54:38,478] [INFO] [timer.py:197:stop] 0/9352, RunningAvgSamplesPerSec=6.340164046274461, CurrSamplesPerSec=5.680551876945872, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:54:49,801] [INFO] [timer.py:197:stop] 0/9354, RunningAvgSamplesPerSec=6.340160208992431, CurrSamplesPerSec=5.693153029852572, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:55:01,089] [INFO] [timer.py:197:stop] 0/9356, RunningAvgSamplesPerSec=6.340161150460569, CurrSamplesPerSec=5.701699064564827, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:55:12,396] [INFO] [timer.py:197:stop] 0/9358, RunningAvgSamplesPerSec=6.340158916026741, CurrSamplesPerSec=5.697199381370964, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:55:23,677] [INFO] [logging.py:68:log_dist] [Rank 0] step=4680, skipped=5, lr=[7.244444444444446e-07], mom=[[0.9, 0.999]] +[2022-12-17 12:55:23,679] [INFO] [timer.py:197:stop] 0/9360, RunningAvgSamplesPerSec=6.34016184496023, CurrSamplesPerSec=5.720007742725363, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:55:34,926] [INFO] [timer.py:197:stop] 0/9362, RunningAvgSamplesPerSec=6.340168235538573, CurrSamplesPerSec=5.749692976105513, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:55:46,320] [INFO] [timer.py:197:stop] 0/9364, RunningAvgSamplesPerSec=6.3401557681292, CurrSamplesPerSec=5.607521341696767, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:55:57,593] [INFO] [timer.py:197:stop] 0/9366, RunningAvgSamplesPerSec=6.3401619815766015, CurrSamplesPerSec=5.740824544694479, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:56:08,921] [INFO] [timer.py:197:stop] 0/9368, RunningAvgSamplesPerSec=6.340162699156885, CurrSamplesPerSec=5.677324385946494, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:56:20,226] [INFO] [timer.py:197:stop] 0/9370, RunningAvgSamplesPerSec=6.340159813721217, CurrSamplesPerSec=5.69181477566186, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:56:31,543] [INFO] [timer.py:197:stop] 0/9372, RunningAvgSamplesPerSec=6.340156868598053, CurrSamplesPerSec=5.6598232614155615, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:56:42,837] [INFO] [timer.py:197:stop] 0/9374, RunningAvgSamplesPerSec=6.340157914042522, CurrSamplesPerSec=5.704102352588936, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:56:54,307] [INFO] [timer.py:197:stop] 0/9376, RunningAvgSamplesPerSec=6.3401579124363225, CurrSamplesPerSec=5.707784389579081, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:57:05,590] [INFO] [timer.py:197:stop] 0/9378, RunningAvgSamplesPerSec=6.340159614914527, CurrSamplesPerSec=5.71288119027779, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:57:16,873] [INFO] [logging.py:68:log_dist] [Rank 0] step=4690, skipped=5, lr=[7.022222222222223e-07], mom=[[0.9, 0.999]] +[2022-12-17 12:57:16,875] [INFO] [timer.py:197:stop] 0/9380, RunningAvgSamplesPerSec=6.340161045115699, CurrSamplesPerSec=5.714271214580349, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:57:28,212] [INFO] [timer.py:197:stop] 0/9382, RunningAvgSamplesPerSec=6.340157356492733, CurrSamplesPerSec=5.691090985175394, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:57:39,507] [INFO] [timer.py:197:stop] 0/9384, RunningAvgSamplesPerSec=6.340157980652823, CurrSamplesPerSec=5.712876326988239, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:57:50,792] [INFO] [timer.py:197:stop] 0/9386, RunningAvgSamplesPerSec=6.340157660006358, CurrSamplesPerSec=5.6946078846918695, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:58:02,061] [INFO] [timer.py:197:stop] 0/9388, RunningAvgSamplesPerSec=6.3401619432927445, CurrSamplesPerSec=5.723264680143104, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:58:13,339] [INFO] [timer.py:197:stop] 0/9390, RunningAvgSamplesPerSec=6.340164537079148, CurrSamplesPerSec=5.731874497753184, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:58:24,624] [INFO] [timer.py:197:stop] 0/9392, RunningAvgSamplesPerSec=6.340169133071809, CurrSamplesPerSec=5.724244706348782, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:58:35,945] [INFO] [timer.py:197:stop] 0/9394, RunningAvgSamplesPerSec=6.3401673757243175, CurrSamplesPerSec=5.700526507026288, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:58:47,278] [INFO] [timer.py:197:stop] 0/9396, RunningAvgSamplesPerSec=6.340163410174969, CurrSamplesPerSec=5.68879750339704, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:58:58,578] [INFO] [timer.py:197:stop] 0/9398, RunningAvgSamplesPerSec=6.340164316536951, CurrSamplesPerSec=5.704421149981809, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:59:09,853] [INFO] [logging.py:68:log_dist] [Rank 0] step=4700, skipped=5, lr=[6.800000000000001e-07], mom=[[0.9, 0.999]] +[2022-12-17 12:59:09,855] [INFO] [timer.py:197:stop] 0/9400, RunningAvgSamplesPerSec=6.340164796341688, CurrSamplesPerSec=5.718083081378961, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 6.800000000000001e-07, 'epoch': 19.92} +[2022-12-17 12:59:21,160] [INFO] [timer.py:197:stop] 0/9402, RunningAvgSamplesPerSec=6.340163757847574, CurrSamplesPerSec=5.702092688996432, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:59:32,460] [INFO] [timer.py:197:stop] 0/9404, RunningAvgSamplesPerSec=6.34016305691172, CurrSamplesPerSec=5.712065489245712, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:59:43,743] [INFO] [timer.py:197:stop] 0/9406, RunningAvgSamplesPerSec=6.3401647800453125, CurrSamplesPerSec=5.709922679480389, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 12:59:55,041] [INFO] [timer.py:197:stop] 0/9408, RunningAvgSamplesPerSec=6.340165372569974, CurrSamplesPerSec=5.702819523059122, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:00:06,357] [INFO] [timer.py:197:stop] 0/9410, RunningAvgSamplesPerSec=6.340163380924811, CurrSamplesPerSec=5.681837935931388, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:00:17,682] [INFO] [timer.py:197:stop] 0/9412, RunningAvgSamplesPerSec=6.340159713715301, CurrSamplesPerSec=5.696154135396421, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:00:28,954] [INFO] [timer.py:197:stop] 0/9414, RunningAvgSamplesPerSec=6.340163134519667, CurrSamplesPerSec=5.73100686895559, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:00:40,268] [INFO] [timer.py:197:stop] 0/9416, RunningAvgSamplesPerSec=6.340160765431248, CurrSamplesPerSec=5.705356656289844, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:00:51,568] [INFO] [timer.py:197:stop] 0/9418, RunningAvgSamplesPerSec=6.340159910382644, CurrSamplesPerSec=5.691176411349352, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:01:02,868] [INFO] [logging.py:68:log_dist] [Rank 0] step=4710, skipped=5, lr=[6.577777777777779e-07], mom=[[0.9, 0.999]] +[2022-12-17 13:01:02,870] [INFO] [timer.py:197:stop] 0/9420, RunningAvgSamplesPerSec=6.340158895996132, CurrSamplesPerSec=5.69437957089141, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:01:14,169] [INFO] [timer.py:197:stop] 0/9422, RunningAvgSamplesPerSec=6.340159772010291, CurrSamplesPerSec=5.704893956969019, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:01:25,476] [INFO] [timer.py:197:stop] 0/9424, RunningAvgSamplesPerSec=6.340158699659639, CurrSamplesPerSec=5.697029137049317, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:01:36,784] [INFO] [timer.py:197:stop] 0/9426, RunningAvgSamplesPerSec=6.34015888231509, CurrSamplesPerSec=5.7065775435697255, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:01:48,099] [INFO] [timer.py:197:stop] 0/9428, RunningAvgSamplesPerSec=6.340157566303402, CurrSamplesPerSec=5.6882292431636685, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:01:59,383] [INFO] [timer.py:197:stop] 0/9430, RunningAvgSamplesPerSec=6.340160039398767, CurrSamplesPerSec=5.715575509259925, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:02:10,680] [INFO] [timer.py:197:stop] 0/9432, RunningAvgSamplesPerSec=6.34015882945978, CurrSamplesPerSec=5.695347795512591, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:02:21,962] [INFO] [timer.py:197:stop] 0/9434, RunningAvgSamplesPerSec=6.3401592538599445, CurrSamplesPerSec=5.703278493058004, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:02:33,260] [INFO] [timer.py:197:stop] 0/9436, RunningAvgSamplesPerSec=6.340159679445317, CurrSamplesPerSec=5.71391117838237, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:02:44,507] [INFO] [timer.py:197:stop] 0/9438, RunningAvgSamplesPerSec=6.3401666888108155, CurrSamplesPerSec=5.731557519685642, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:02:53,004] [INFO] [logging.py:68:log_dist] [Rank 0] step=4720, skipped=5, lr=[6.355555555555556e-07], mom=[[0.9, 0.999]] +[2022-12-17 13:02:53,006] [INFO] [timer.py:197:stop] 0/9440, RunningAvgSamplesPerSec=6.340495323266648, CurrSamplesPerSec=10.172010687840247, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:03:04,333] [INFO] [timer.py:197:stop] 0/9442, RunningAvgSamplesPerSec=6.340491284144846, CurrSamplesPerSec=5.67950263607097, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:03:15,606] [INFO] [timer.py:197:stop] 0/9444, RunningAvgSamplesPerSec=6.340495059977157, CurrSamplesPerSec=5.720786211810857, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:03:26,942] [INFO] [timer.py:197:stop] 0/9446, RunningAvgSamplesPerSec=6.340490948343084, CurrSamplesPerSec=5.663866573890551, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:03:38,290] [INFO] [timer.py:197:stop] 0/9448, RunningAvgSamplesPerSec=6.340490033332966, CurrSamplesPerSec=5.692823417606247, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:03:49,604] [INFO] [timer.py:197:stop] 0/9450, RunningAvgSamplesPerSec=6.340487672942547, CurrSamplesPerSec=5.679561758239619, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 6.244444444444445e-07, 'epoch': 20.02} +[2022-12-17 13:04:00,945] [INFO] [timer.py:197:stop] 0/9452, RunningAvgSamplesPerSec=6.340482910496316, CurrSamplesPerSec=5.684355467373568, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:04:12,236] [INFO] [timer.py:197:stop] 0/9454, RunningAvgSamplesPerSec=6.34048233937946, CurrSamplesPerSec=5.706420324723856, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:04:23,487] [INFO] [timer.py:197:stop] 0/9456, RunningAvgSamplesPerSec=6.340486824333612, CurrSamplesPerSec=5.7355645464489875, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:04:34,766] [INFO] [timer.py:197:stop] 0/9458, RunningAvgSamplesPerSec=6.34048767797844, CurrSamplesPerSec=5.698039628069896, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:04:46,018] [INFO] [logging.py:68:log_dist] [Rank 0] step=4730, skipped=5, lr=[6.133333333333333e-07], mom=[[0.9, 0.999]] +[2022-12-17 13:04:46,020] [INFO] [timer.py:197:stop] 0/9460, RunningAvgSamplesPerSec=6.340492541433014, CurrSamplesPerSec=5.735454008695971, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:04:57,292] [INFO] [timer.py:197:stop] 0/9462, RunningAvgSamplesPerSec=6.340493819560258, CurrSamplesPerSec=5.710972499087195, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:05:08,606] [INFO] [timer.py:197:stop] 0/9464, RunningAvgSamplesPerSec=6.34049219161994, CurrSamplesPerSec=5.696880907027384, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:05:19,873] [INFO] [timer.py:197:stop] 0/9466, RunningAvgSamplesPerSec=6.340496758985929, CurrSamplesPerSec=5.7248873365583535, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:05:31,147] [INFO] [timer.py:197:stop] 0/9468, RunningAvgSamplesPerSec=6.340498220648656, CurrSamplesPerSec=5.710878215779455, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:05:42,436] [INFO] [timer.py:197:stop] 0/9470, RunningAvgSamplesPerSec=6.340497412838961, CurrSamplesPerSec=5.704533404238147, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:05:53,775] [INFO] [timer.py:197:stop] 0/9472, RunningAvgSamplesPerSec=6.340491861896453, CurrSamplesPerSec=5.657351254286605, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:06:05,054] [INFO] [timer.py:197:stop] 0/9474, RunningAvgSamplesPerSec=6.340493855901343, CurrSamplesPerSec=5.727688808571485, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:06:16,331] [INFO] [timer.py:197:stop] 0/9476, RunningAvgSamplesPerSec=6.340496368385586, CurrSamplesPerSec=5.70959112263088, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:06:27,652] [INFO] [timer.py:197:stop] 0/9478, RunningAvgSamplesPerSec=6.34049401426013, CurrSamplesPerSec=5.69632650324217, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:06:38,925] [INFO] [logging.py:68:log_dist] [Rank 0] step=4740, skipped=5, lr=[5.911111111111111e-07], mom=[[0.9, 0.999]] +[2022-12-17 13:06:38,927] [INFO] [timer.py:197:stop] 0/9480, RunningAvgSamplesPerSec=6.340497114118926, CurrSamplesPerSec=5.710877486796936, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:06:50,210] [INFO] [timer.py:197:stop] 0/9482, RunningAvgSamplesPerSec=6.340499386506941, CurrSamplesPerSec=5.705616897005835, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:07:01,460] [INFO] [timer.py:197:stop] 0/9484, RunningAvgSamplesPerSec=6.340505909929958, CurrSamplesPerSec=5.74067329032332, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:07:12,722] [INFO] [timer.py:197:stop] 0/9486, RunningAvgSamplesPerSec=6.340509912791447, CurrSamplesPerSec=5.723496536483985, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:07:24,014] [INFO] [timer.py:197:stop] 0/9488, RunningAvgSamplesPerSec=6.340508227847923, CurrSamplesPerSec=5.67338486498578, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:07:35,313] [INFO] [timer.py:197:stop] 0/9490, RunningAvgSamplesPerSec=6.3405131586608485, CurrSamplesPerSec=5.720401704983039, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:07:46,561] [INFO] [timer.py:197:stop] 0/9492, RunningAvgSamplesPerSec=6.340517223218006, CurrSamplesPerSec=5.7341834981743185, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:07:57,791] [INFO] [timer.py:197:stop] 0/9494, RunningAvgSamplesPerSec=6.340525965766664, CurrSamplesPerSec=5.73762363116758, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:08:09,054] [INFO] [timer.py:197:stop] 0/9496, RunningAvgSamplesPerSec=6.340529051682444, CurrSamplesPerSec=5.731980736084711, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:08:20,372] [INFO] [timer.py:197:stop] 0/9498, RunningAvgSamplesPerSec=6.340527546537201, CurrSamplesPerSec=5.694462438228334, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:08:31,637] [INFO] [logging.py:68:log_dist] [Rank 0] step=4750, skipped=5, lr=[5.68888888888889e-07], mom=[[0.9, 0.999]] +[2022-12-17 13:08:31,638] [INFO] [timer.py:197:stop] 0/9500, RunningAvgSamplesPerSec=6.340532537658078, CurrSamplesPerSec=5.741129778677454, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 5.68888888888889e-07, 'epoch': 20.13} +[2022-12-17 13:08:42,927] [INFO] [timer.py:197:stop] 0/9502, RunningAvgSamplesPerSec=6.34053487353236, CurrSamplesPerSec=5.709036670739029, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:08:54,189] [INFO] [timer.py:197:stop] 0/9504, RunningAvgSamplesPerSec=6.340539638784326, CurrSamplesPerSec=5.706077530334509, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:09:05,432] [INFO] [timer.py:197:stop] 0/9506, RunningAvgSamplesPerSec=6.340543448414384, CurrSamplesPerSec=5.7210751745464705, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:09:16,749] [INFO] [timer.py:197:stop] 0/9508, RunningAvgSamplesPerSec=6.340541355451662, CurrSamplesPerSec=5.687145114146095, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:09:28,055] [INFO] [timer.py:197:stop] 0/9510, RunningAvgSamplesPerSec=6.3405402884819715, CurrSamplesPerSec=5.715332125522094, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:09:39,399] [INFO] [timer.py:197:stop] 0/9512, RunningAvgSamplesPerSec=6.340534225994082, CurrSamplesPerSec=5.662470382483525, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:09:50,688] [INFO] [timer.py:197:stop] 0/9514, RunningAvgSamplesPerSec=6.340533705118219, CurrSamplesPerSec=5.699211404662409, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:10:01,949] [INFO] [timer.py:197:stop] 0/9516, RunningAvgSamplesPerSec=6.340536044772183, CurrSamplesPerSec=5.72302137321579, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:10:13,239] [INFO] [timer.py:197:stop] 0/9518, RunningAvgSamplesPerSec=6.340536540174654, CurrSamplesPerSec=5.723779914847205, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:10:24,570] [INFO] [logging.py:68:log_dist] [Rank 0] step=4760, skipped=5, lr=[5.466666666666667e-07], mom=[[0.9, 0.999]] +[2022-12-17 13:10:24,572] [INFO] [timer.py:197:stop] 0/9520, RunningAvgSamplesPerSec=6.3405322503302814, CurrSamplesPerSec=5.682484792575845, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:10:35,875] [INFO] [timer.py:197:stop] 0/9522, RunningAvgSamplesPerSec=6.3405315251666785, CurrSamplesPerSec=5.702043755493652, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:10:47,179] [INFO] [timer.py:197:stop] 0/9524, RunningAvgSamplesPerSec=6.340530874797334, CurrSamplesPerSec=5.703819707289909, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:10:58,484] [INFO] [timer.py:197:stop] 0/9526, RunningAvgSamplesPerSec=6.340529822737708, CurrSamplesPerSec=5.689387582292352, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:11:09,804] [INFO] [timer.py:197:stop] 0/9528, RunningAvgSamplesPerSec=6.340528232367791, CurrSamplesPerSec=5.692070885675692, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:11:21,133] [INFO] [timer.py:197:stop] 0/9530, RunningAvgSamplesPerSec=6.340525136810104, CurrSamplesPerSec=5.682145108321376, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:11:32,444] [INFO] [timer.py:197:stop] 0/9532, RunningAvgSamplesPerSec=6.340524288841033, CurrSamplesPerSec=5.710917824109433, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:11:43,753] [INFO] [timer.py:197:stop] 0/9534, RunningAvgSamplesPerSec=6.3405241001501285, CurrSamplesPerSec=5.6932148514914065, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:11:55,061] [INFO] [timer.py:197:stop] 0/9536, RunningAvgSamplesPerSec=6.340523159885818, CurrSamplesPerSec=5.70686944041714, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:12:06,347] [INFO] [timer.py:197:stop] 0/9538, RunningAvgSamplesPerSec=6.340524138848755, CurrSamplesPerSec=5.707079100468829, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:12:17,633] [INFO] [logging.py:68:log_dist] [Rank 0] step=4770, skipped=5, lr=[5.244444444444445e-07], mom=[[0.9, 0.999]] +[2022-12-17 13:12:17,634] [INFO] [timer.py:197:stop] 0/9540, RunningAvgSamplesPerSec=6.340528219980291, CurrSamplesPerSec=5.720528974189757, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:12:28,913] [INFO] [timer.py:197:stop] 0/9542, RunningAvgSamplesPerSec=6.340531213151512, CurrSamplesPerSec=5.716052358086121, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:12:40,206] [INFO] [timer.py:197:stop] 0/9544, RunningAvgSamplesPerSec=6.340532220953055, CurrSamplesPerSec=5.7087142007328815, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:12:51,456] [INFO] [timer.py:197:stop] 0/9546, RunningAvgSamplesPerSec=6.340539098378529, CurrSamplesPerSec=5.724794302509103, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:13:02,844] [INFO] [timer.py:197:stop] 0/9548, RunningAvgSamplesPerSec=6.340537904218002, CurrSamplesPerSec=5.713264443529819, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:13:14,112] [INFO] [timer.py:197:stop] 0/9550, RunningAvgSamplesPerSec=6.340542398990786, CurrSamplesPerSec=5.715218229052894, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 5.133333333333334e-07, 'epoch': 20.23} +[2022-12-17 13:13:25,382] [INFO] [timer.py:197:stop] 0/9552, RunningAvgSamplesPerSec=6.3405453106084675, CurrSamplesPerSec=5.710071103117879, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:13:36,655] [INFO] [timer.py:197:stop] 0/9554, RunningAvgSamplesPerSec=6.3405490647928, CurrSamplesPerSec=5.730727178909781, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:13:47,927] [INFO] [timer.py:197:stop] 0/9556, RunningAvgSamplesPerSec=6.340553842919432, CurrSamplesPerSec=5.7252531532934965, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:13:59,181] [INFO] [timer.py:197:stop] 0/9558, RunningAvgSamplesPerSec=6.340556461041022, CurrSamplesPerSec=5.717279532461573, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:14:10,382] [INFO] [logging.py:68:log_dist] [Rank 0] step=4780, skipped=5, lr=[5.022222222222222e-07], mom=[[0.9, 0.999]] +[2022-12-17 13:14:10,383] [INFO] [timer.py:197:stop] 0/9560, RunningAvgSamplesPerSec=6.340566945139066, CurrSamplesPerSec=5.766731215163039, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:14:21,647] [INFO] [timer.py:197:stop] 0/9562, RunningAvgSamplesPerSec=6.340572386150829, CurrSamplesPerSec=5.734300356731472, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:14:32,907] [INFO] [timer.py:197:stop] 0/9564, RunningAvgSamplesPerSec=6.340577394733058, CurrSamplesPerSec=5.722657793597098, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:14:44,137] [INFO] [timer.py:197:stop] 0/9566, RunningAvgSamplesPerSec=6.34058489836147, CurrSamplesPerSec=5.747707676283041, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:14:55,419] [INFO] [timer.py:197:stop] 0/9568, RunningAvgSamplesPerSec=6.340586714162821, CurrSamplesPerSec=5.719065963822951, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:15:06,726] [INFO] [timer.py:197:stop] 0/9570, RunningAvgSamplesPerSec=6.340587735441634, CurrSamplesPerSec=5.710436486486199, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:15:18,005] [INFO] [timer.py:197:stop] 0/9572, RunningAvgSamplesPerSec=6.340588577229494, CurrSamplesPerSec=5.709074796483692, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:15:29,241] [INFO] [timer.py:197:stop] 0/9574, RunningAvgSamplesPerSec=6.340596598155912, CurrSamplesPerSec=5.740768560004644, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:15:40,507] [INFO] [timer.py:197:stop] 0/9576, RunningAvgSamplesPerSec=6.3406009572890945, CurrSamplesPerSec=5.72478258189847, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:15:51,771] [INFO] [timer.py:197:stop] 0/9578, RunningAvgSamplesPerSec=6.340606231269369, CurrSamplesPerSec=5.709940412176869, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:16:03,031] [INFO] [logging.py:68:log_dist] [Rank 0] step=4790, skipped=5, lr=[4.800000000000001e-07], mom=[[0.9, 0.999]] +[2022-12-17 13:16:03,033] [INFO] [timer.py:197:stop] 0/9580, RunningAvgSamplesPerSec=6.340611041332799, CurrSamplesPerSec=5.738459406638726, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:16:14,353] [INFO] [timer.py:197:stop] 0/9582, RunningAvgSamplesPerSec=6.340610755927412, CurrSamplesPerSec=5.708338355468865, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:16:25,645] [INFO] [timer.py:197:stop] 0/9584, RunningAvgSamplesPerSec=6.340613416570764, CurrSamplesPerSec=5.708610765511207, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:16:36,934] [INFO] [timer.py:197:stop] 0/9586, RunningAvgSamplesPerSec=6.340618523642324, CurrSamplesPerSec=5.722949629686229, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:16:48,199] [INFO] [timer.py:197:stop] 0/9588, RunningAvgSamplesPerSec=6.3406237510816394, CurrSamplesPerSec=5.724449785421197, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:16:59,446] [INFO] [timer.py:197:stop] 0/9590, RunningAvgSamplesPerSec=6.340630585642387, CurrSamplesPerSec=5.754374182993225, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:17:10,803] [INFO] [timer.py:197:stop] 0/9592, RunningAvgSamplesPerSec=6.340623210045512, CurrSamplesPerSec=5.702429432409602, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:17:22,070] [INFO] [timer.py:197:stop] 0/9594, RunningAvgSamplesPerSec=6.3406282526534286, CurrSamplesPerSec=5.7228803281207705, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:17:33,367] [INFO] [timer.py:197:stop] 0/9596, RunningAvgSamplesPerSec=6.340630603611362, CurrSamplesPerSec=5.723601488161577, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:17:44,622] [INFO] [timer.py:197:stop] 0/9598, RunningAvgSamplesPerSec=6.340631843955927, CurrSamplesPerSec=5.724446123166047, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:17:55,858] [INFO] [logging.py:68:log_dist] [Rank 0] step=4800, skipped=5, lr=[4.5777777777777784e-07], mom=[[0.9, 0.999]] +[2022-12-17 13:17:55,859] [INFO] [timer.py:197:stop] 0/9600, RunningAvgSamplesPerSec=6.340637832959923, CurrSamplesPerSec=5.731995668516094, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 4.5777777777777784e-07, 'epoch': 20.34} +[2022-12-17 13:18:07,153] [INFO] [timer.py:197:stop] 0/9602, RunningAvgSamplesPerSec=6.340640425084899, CurrSamplesPerSec=5.713372181970451, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:18:18,399] [INFO] [timer.py:197:stop] 0/9604, RunningAvgSamplesPerSec=6.34064592644466, CurrSamplesPerSec=5.708614650334416, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:18:29,611] [INFO] [timer.py:197:stop] 0/9606, RunningAvgSamplesPerSec=6.340654324601269, CurrSamplesPerSec=5.739666031481032, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:18:40,908] [INFO] [timer.py:197:stop] 0/9608, RunningAvgSamplesPerSec=6.340654018292477, CurrSamplesPerSec=5.700855559061133, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:18:52,175] [INFO] [timer.py:197:stop] 0/9610, RunningAvgSamplesPerSec=6.34065773956752, CurrSamplesPerSec=5.725173538996036, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:19:03,445] [INFO] [timer.py:197:stop] 0/9612, RunningAvgSamplesPerSec=6.34066179110196, CurrSamplesPerSec=5.728430251233179, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:19:14,699] [INFO] [timer.py:197:stop] 0/9614, RunningAvgSamplesPerSec=6.340667541111354, CurrSamplesPerSec=5.733595602853228, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:19:25,955] [INFO] [timer.py:197:stop] 0/9616, RunningAvgSamplesPerSec=6.340673556128894, CurrSamplesPerSec=5.7353750907785255, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:19:37,225] [INFO] [timer.py:197:stop] 0/9618, RunningAvgSamplesPerSec=6.340677615136063, CurrSamplesPerSec=5.725240942367918, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:19:48,470] [INFO] [logging.py:68:log_dist] [Rank 0] step=4810, skipped=5, lr=[4.355555555555556e-07], mom=[[0.9, 0.999]] +[2022-12-17 13:19:48,472] [INFO] [timer.py:197:stop] 0/9620, RunningAvgSamplesPerSec=6.340680049567392, CurrSamplesPerSec=5.722354764544266, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:19:59,793] [INFO] [timer.py:197:stop] 0/9622, RunningAvgSamplesPerSec=6.340678324203684, CurrSamplesPerSec=5.682935923325895, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:20:11,140] [INFO] [timer.py:197:stop] 0/9624, RunningAvgSamplesPerSec=6.340671645715207, CurrSamplesPerSec=5.643411426373468, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:20:22,407] [INFO] [timer.py:197:stop] 0/9626, RunningAvgSamplesPerSec=6.340675981803199, CurrSamplesPerSec=5.716829263846316, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:20:33,636] [INFO] [timer.py:197:stop] 0/9628, RunningAvgSamplesPerSec=6.340684535616191, CurrSamplesPerSec=5.735601801817971, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:20:44,887] [INFO] [timer.py:197:stop] 0/9630, RunningAvgSamplesPerSec=6.340690407688014, CurrSamplesPerSec=5.740077680962519, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:20:56,211] [INFO] [timer.py:197:stop] 0/9632, RunningAvgSamplesPerSec=6.340686808726378, CurrSamplesPerSec=5.70311176257181, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:21:07,481] [INFO] [timer.py:197:stop] 0/9634, RunningAvgSamplesPerSec=6.340688182776883, CurrSamplesPerSec=5.714741032735308, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:21:18,788] [INFO] [timer.py:197:stop] 0/9636, RunningAvgSamplesPerSec=6.340689150997104, CurrSamplesPerSec=5.701154862332323, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:21:30,068] [INFO] [timer.py:197:stop] 0/9638, RunningAvgSamplesPerSec=6.340689748082774, CurrSamplesPerSec=5.7023276784080865, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:21:41,358] [INFO] [logging.py:68:log_dist] [Rank 0] step=4820, skipped=5, lr=[4.133333333333334e-07], mom=[[0.9, 0.999]] +[2022-12-17 13:21:41,360] [INFO] [timer.py:197:stop] 0/9640, RunningAvgSamplesPerSec=6.340691169588604, CurrSamplesPerSec=5.722409902708005, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:21:52,645] [INFO] [timer.py:197:stop] 0/9642, RunningAvgSamplesPerSec=6.340693182885456, CurrSamplesPerSec=5.718977748871354, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:22:03,898] [INFO] [timer.py:197:stop] 0/9644, RunningAvgSamplesPerSec=6.340695317813755, CurrSamplesPerSec=5.735734160332338, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:22:15,172] [INFO] [timer.py:197:stop] 0/9646, RunningAvgSamplesPerSec=6.340698505054053, CurrSamplesPerSec=5.725852529541198, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:22:26,496] [INFO] [timer.py:197:stop] 0/9648, RunningAvgSamplesPerSec=6.340696415651516, CurrSamplesPerSec=5.709592579939372, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:22:37,825] [INFO] [timer.py:197:stop] 0/9650, RunningAvgSamplesPerSec=6.340692969068761, CurrSamplesPerSec=5.689682788244413, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 4.0222222222222224e-07, 'epoch': 20.44} +[2022-12-17 13:22:49,132] [INFO] [timer.py:197:stop] 0/9652, RunningAvgSamplesPerSec=6.3406922739664155, CurrSamplesPerSec=5.686778127201508, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:23:00,392] [INFO] [timer.py:197:stop] 0/9654, RunningAvgSamplesPerSec=6.340693233482764, CurrSamplesPerSec=5.707892164175741, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:23:11,681] [INFO] [timer.py:197:stop] 0/9656, RunningAvgSamplesPerSec=6.340694915186403, CurrSamplesPerSec=5.702618898345167, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:23:22,958] [INFO] [timer.py:197:stop] 0/9658, RunningAvgSamplesPerSec=6.340696549072195, CurrSamplesPerSec=5.713376073276843, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:23:34,305] [INFO] [logging.py:68:log_dist] [Rank 0] step=4830, skipped=5, lr=[3.9111111111111115e-07], mom=[[0.9, 0.999]] +[2022-12-17 13:23:34,307] [INFO] [timer.py:197:stop] 0/9660, RunningAvgSamplesPerSec=6.34068979430831, CurrSamplesPerSec=5.641418446918364, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:23:45,571] [INFO] [timer.py:197:stop] 0/9662, RunningAvgSamplesPerSec=6.340692254869143, CurrSamplesPerSec=5.731172297788931, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:23:56,829] [INFO] [timer.py:197:stop] 0/9664, RunningAvgSamplesPerSec=6.340697391418571, CurrSamplesPerSec=5.741566199321276, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:24:08,156] [INFO] [timer.py:197:stop] 0/9666, RunningAvgSamplesPerSec=6.340694087279765, CurrSamplesPerSec=5.677179821010574, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:24:19,489] [INFO] [timer.py:197:stop] 0/9668, RunningAvgSamplesPerSec=6.34068814444375, CurrSamplesPerSec=5.676951461835517, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:24:30,789] [INFO] [timer.py:197:stop] 0/9670, RunningAvgSamplesPerSec=6.340686576979262, CurrSamplesPerSec=5.686611637253284, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:24:42,123] [INFO] [timer.py:197:stop] 0/9672, RunningAvgSamplesPerSec=6.340682922114108, CurrSamplesPerSec=5.673882282916007, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:24:53,463] [INFO] [timer.py:197:stop] 0/9674, RunningAvgSamplesPerSec=6.340678183937235, CurrSamplesPerSec=5.659441894190174, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:25:04,793] [INFO] [timer.py:197:stop] 0/9676, RunningAvgSamplesPerSec=6.340674849212192, CurrSamplesPerSec=5.671807087681444, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:25:16,103] [INFO] [timer.py:197:stop] 0/9678, RunningAvgSamplesPerSec=6.340673436963481, CurrSamplesPerSec=5.701336493716402, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:25:27,468] [INFO] [logging.py:68:log_dist] [Rank 0] step=4840, skipped=5, lr=[3.6888888888888893e-07], mom=[[0.9, 0.999]] +[2022-12-17 13:25:27,469] [INFO] [timer.py:197:stop] 0/9680, RunningAvgSamplesPerSec=6.340664750330204, CurrSamplesPerSec=5.628685730796901, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:25:38,774] [INFO] [timer.py:197:stop] 0/9682, RunningAvgSamplesPerSec=6.340664072347203, CurrSamplesPerSec=5.694476934220253, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:25:50,093] [INFO] [timer.py:197:stop] 0/9684, RunningAvgSamplesPerSec=6.340661364452313, CurrSamplesPerSec=5.674273035404013, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:26:01,484] [INFO] [timer.py:197:stop] 0/9686, RunningAvgSamplesPerSec=6.3406516390929175, CurrSamplesPerSec=5.6439826334632945, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:26:12,815] [INFO] [timer.py:197:stop] 0/9688, RunningAvgSamplesPerSec=6.340648878300982, CurrSamplesPerSec=5.679193105947094, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:26:24,108] [INFO] [timer.py:197:stop] 0/9690, RunningAvgSamplesPerSec=6.34064983052059, CurrSamplesPerSec=5.707535843863806, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:26:35,408] [INFO] [timer.py:197:stop] 0/9692, RunningAvgSamplesPerSec=6.340648029931281, CurrSamplesPerSec=5.687791733628178, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:26:46,714] [INFO] [timer.py:197:stop] 0/9694, RunningAvgSamplesPerSec=6.340645924950521, CurrSamplesPerSec=5.679688899105148, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:26:58,001] [INFO] [timer.py:197:stop] 0/9696, RunningAvgSamplesPerSec=6.340645967687609, CurrSamplesPerSec=5.679108519733628, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:27:09,299] [INFO] [timer.py:197:stop] 0/9698, RunningAvgSamplesPerSec=6.3406472989057505, CurrSamplesPerSec=5.712760339986075, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:27:20,604] [INFO] [logging.py:68:log_dist] [Rank 0] step=4850, skipped=5, lr=[3.466666666666667e-07], mom=[[0.9, 0.999]] +[2022-12-17 13:27:20,606] [INFO] [timer.py:197:stop] 0/9700, RunningAvgSamplesPerSec=6.340648066938377, CurrSamplesPerSec=5.688963156833746, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 3.466666666666667e-07, 'epoch': 20.55} +[2022-12-17 13:27:31,925] [INFO] [timer.py:197:stop] 0/9702, RunningAvgSamplesPerSec=6.340644614174266, CurrSamplesPerSec=5.69075485566726, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:27:43,348] [INFO] [timer.py:197:stop] 0/9704, RunningAvgSamplesPerSec=6.340641727402441, CurrSamplesPerSec=5.684615240287061, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:27:54,685] [INFO] [timer.py:197:stop] 0/9706, RunningAvgSamplesPerSec=6.340637493457199, CurrSamplesPerSec=5.689613084076415, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:28:05,987] [INFO] [timer.py:197:stop] 0/9708, RunningAvgSamplesPerSec=6.3406360614694695, CurrSamplesPerSec=5.70824682958766, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:28:17,259] [INFO] [timer.py:197:stop] 0/9710, RunningAvgSamplesPerSec=6.340639036202918, CurrSamplesPerSec=5.72194516517078, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:28:28,566] [INFO] [timer.py:197:stop] 0/9712, RunningAvgSamplesPerSec=6.340639594011932, CurrSamplesPerSec=5.708637959384705, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:28:39,868] [INFO] [timer.py:197:stop] 0/9714, RunningAvgSamplesPerSec=6.340638689416856, CurrSamplesPerSec=5.715024031439892, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:28:51,161] [INFO] [timer.py:197:stop] 0/9716, RunningAvgSamplesPerSec=6.340639113239258, CurrSamplesPerSec=5.6984850060904595, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:29:02,418] [INFO] [timer.py:197:stop] 0/9718, RunningAvgSamplesPerSec=6.340642093499504, CurrSamplesPerSec=5.738537673397883, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:29:13,689] [INFO] [logging.py:68:log_dist] [Rank 0] step=4860, skipped=5, lr=[3.2444444444444447e-07], mom=[[0.9, 0.999]] +[2022-12-17 13:29:13,690] [INFO] [timer.py:197:stop] 0/9720, RunningAvgSamplesPerSec=6.340644162013655, CurrSamplesPerSec=5.708898742362166, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:29:25,006] [INFO] [timer.py:197:stop] 0/9722, RunningAvgSamplesPerSec=6.340642235669002, CurrSamplesPerSec=5.686858604853792, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:29:36,299] [INFO] [timer.py:197:stop] 0/9724, RunningAvgSamplesPerSec=6.340642705245801, CurrSamplesPerSec=5.6947743599454475, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:29:47,574] [INFO] [timer.py:197:stop] 0/9726, RunningAvgSamplesPerSec=6.340645698121382, CurrSamplesPerSec=5.712487533780942, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:29:58,862] [INFO] [timer.py:197:stop] 0/9728, RunningAvgSamplesPerSec=6.340647725574473, CurrSamplesPerSec=5.708532098978827, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:30:10,148] [INFO] [timer.py:197:stop] 0/9730, RunningAvgSamplesPerSec=6.340650054631584, CurrSamplesPerSec=5.690818555554048, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:30:21,405] [INFO] [timer.py:197:stop] 0/9732, RunningAvgSamplesPerSec=6.340652812405148, CurrSamplesPerSec=5.715923340598525, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:30:32,703] [INFO] [timer.py:197:stop] 0/9734, RunningAvgSamplesPerSec=6.340653639196751, CurrSamplesPerSec=5.699098875774059, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:30:43,977] [INFO] [timer.py:197:stop] 0/9736, RunningAvgSamplesPerSec=6.340657923589717, CurrSamplesPerSec=5.703578535551888, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:30:55,247] [INFO] [timer.py:197:stop] 0/9738, RunningAvgSamplesPerSec=6.3406613332583, CurrSamplesPerSec=5.716524660457944, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:31:06,551] [INFO] [logging.py:68:log_dist] [Rank 0] step=4870, skipped=5, lr=[3.0222222222222225e-07], mom=[[0.9, 0.999]] +[2022-12-17 13:31:06,553] [INFO] [timer.py:197:stop] 0/9740, RunningAvgSamplesPerSec=6.340660083477266, CurrSamplesPerSec=5.70610736854931, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:31:17,824] [INFO] [timer.py:197:stop] 0/9742, RunningAvgSamplesPerSec=6.340663747945913, CurrSamplesPerSec=5.729440418168294, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:31:29,111] [INFO] [timer.py:197:stop] 0/9744, RunningAvgSamplesPerSec=6.340663146217899, CurrSamplesPerSec=5.710007943180077, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:31:40,451] [INFO] [timer.py:197:stop] 0/9746, RunningAvgSamplesPerSec=6.3406584805563115, CurrSamplesPerSec=5.691350167412572, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:31:51,768] [INFO] [timer.py:197:stop] 0/9748, RunningAvgSamplesPerSec=6.340656735987837, CurrSamplesPerSec=5.690681747327503, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:32:03,069] [INFO] [timer.py:197:stop] 0/9750, RunningAvgSamplesPerSec=6.340654303692601, CurrSamplesPerSec=5.686689218910566, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 2.9111111111111116e-07, 'epoch': 20.66} +[2022-12-17 13:32:14,345] [INFO] [timer.py:197:stop] 0/9752, RunningAvgSamplesPerSec=6.340657376788333, CurrSamplesPerSec=5.725202844614887, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:32:25,650] [INFO] [timer.py:197:stop] 0/9754, RunningAvgSamplesPerSec=6.340656556812783, CurrSamplesPerSec=5.690480528132339, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:32:36,961] [INFO] [timer.py:197:stop] 0/9756, RunningAvgSamplesPerSec=6.340655173128515, CurrSamplesPerSec=5.7164781571213945, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:32:48,279] [INFO] [timer.py:197:stop] 0/9758, RunningAvgSamplesPerSec=6.340652418983041, CurrSamplesPerSec=5.6877141218856835, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:32:59,606] [INFO] [logging.py:68:log_dist] [Rank 0] step=4880, skipped=5, lr=[2.8e-07], mom=[[0.9, 0.999]] +[2022-12-17 13:32:59,608] [INFO] [timer.py:197:stop] 0/9760, RunningAvgSamplesPerSec=6.340648595255576, CurrSamplesPerSec=5.672382619376683, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:33:10,911] [INFO] [timer.py:197:stop] 0/9762, RunningAvgSamplesPerSec=6.34064813649814, CurrSamplesPerSec=5.704121018830548, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:33:22,203] [INFO] [timer.py:197:stop] 0/9764, RunningAvgSamplesPerSec=6.340646665771141, CurrSamplesPerSec=5.699006194026974, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:33:33,517] [INFO] [timer.py:197:stop] 0/9766, RunningAvgSamplesPerSec=6.3406455096689545, CurrSamplesPerSec=5.711425005086199, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:33:44,847] [INFO] [timer.py:197:stop] 0/9768, RunningAvgSamplesPerSec=6.340641344518732, CurrSamplesPerSec=5.690235899501322, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:33:56,142] [INFO] [timer.py:197:stop] 0/9770, RunningAvgSamplesPerSec=6.340640091729031, CurrSamplesPerSec=5.7135424315821295, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:34:07,454] [INFO] [timer.py:197:stop] 0/9772, RunningAvgSamplesPerSec=6.340638459983171, CurrSamplesPerSec=5.700314422978219, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:34:18,735] [INFO] [timer.py:197:stop] 0/9774, RunningAvgSamplesPerSec=6.3406391161081554, CurrSamplesPerSec=5.708320147168445, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:34:30,037] [INFO] [timer.py:197:stop] 0/9776, RunningAvgSamplesPerSec=6.3406391673357545, CurrSamplesPerSec=5.71570937915154, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:34:41,318] [INFO] [timer.py:197:stop] 0/9778, RunningAvgSamplesPerSec=6.340641663527955, CurrSamplesPerSec=5.721050056795874, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:34:52,627] [INFO] [logging.py:68:log_dist] [Rank 0] step=4890, skipped=5, lr=[2.577777777777778e-07], mom=[[0.9, 0.999]] +[2022-12-17 13:34:52,628] [INFO] [timer.py:197:stop] 0/9780, RunningAvgSamplesPerSec=6.3406400030490335, CurrSamplesPerSec=5.697978669195069, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:35:03,925] [INFO] [timer.py:197:stop] 0/9782, RunningAvgSamplesPerSec=6.34064038120064, CurrSamplesPerSec=5.702911844245921, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:35:15,185] [INFO] [timer.py:197:stop] 0/9784, RunningAvgSamplesPerSec=6.340640695137145, CurrSamplesPerSec=5.707185149671572, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:35:26,524] [INFO] [timer.py:197:stop] 0/9786, RunningAvgSamplesPerSec=6.340635597279186, CurrSamplesPerSec=5.680664876745854, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:35:37,820] [INFO] [timer.py:197:stop] 0/9788, RunningAvgSamplesPerSec=6.340635829083338, CurrSamplesPerSec=5.7053639320357314, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:35:49,138] [INFO] [timer.py:197:stop] 0/9790, RunningAvgSamplesPerSec=6.3406354978534845, CurrSamplesPerSec=5.703496372150302, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:36:00,446] [INFO] [timer.py:197:stop] 0/9792, RunningAvgSamplesPerSec=6.340636871356399, CurrSamplesPerSec=5.712987455221542, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:36:11,795] [INFO] [timer.py:197:stop] 0/9794, RunningAvgSamplesPerSec=6.340633147060947, CurrSamplesPerSec=5.656989532874886, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:36:23,176] [INFO] [timer.py:197:stop] 0/9796, RunningAvgSamplesPerSec=6.340631705921807, CurrSamplesPerSec=5.691418948785352, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:36:34,507] [INFO] [timer.py:197:stop] 0/9798, RunningAvgSamplesPerSec=6.340626935726591, CurrSamplesPerSec=5.682324568171797, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:36:45,815] [INFO] [logging.py:68:log_dist] [Rank 0] step=4900, skipped=5, lr=[2.3555555555555556e-07], mom=[[0.9, 0.999]] +[2022-12-17 13:36:45,816] [INFO] [timer.py:197:stop] 0/9800, RunningAvgSamplesPerSec=6.340622782064439, CurrSamplesPerSec=5.689119174059178, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 2.3555555555555556e-07, 'epoch': 20.76} +[2022-12-17 13:36:57,143] [INFO] [timer.py:197:stop] 0/9802, RunningAvgSamplesPerSec=6.340620265372528, CurrSamplesPerSec=5.685125465837036, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:37:08,478] [INFO] [timer.py:197:stop] 0/9804, RunningAvgSamplesPerSec=6.34061955082465, CurrSamplesPerSec=5.71378006836133, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:37:19,836] [INFO] [timer.py:197:stop] 0/9806, RunningAvgSamplesPerSec=6.3406180901346625, CurrSamplesPerSec=5.7119497782507125, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:37:31,216] [INFO] [timer.py:197:stop] 0/9808, RunningAvgSamplesPerSec=6.340614073759756, CurrSamplesPerSec=5.673192061048671, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:37:42,606] [INFO] [timer.py:197:stop] 0/9810, RunningAvgSamplesPerSec=6.340608655859164, CurrSamplesPerSec=5.669230971978268, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:37:53,967] [INFO] [timer.py:197:stop] 0/9812, RunningAvgSamplesPerSec=6.340606529412413, CurrSamplesPerSec=5.6976468048680005, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:38:05,336] [INFO] [timer.py:197:stop] 0/9814, RunningAvgSamplesPerSec=6.3406031791855275, CurrSamplesPerSec=5.703918120913467, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:38:16,681] [INFO] [timer.py:197:stop] 0/9816, RunningAvgSamplesPerSec=6.340602157159592, CurrSamplesPerSec=5.713201456120576, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:38:28,014] [INFO] [timer.py:197:stop] 0/9818, RunningAvgSamplesPerSec=6.340599736519701, CurrSamplesPerSec=5.718833247938542, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:38:39,341] [INFO] [logging.py:68:log_dist] [Rank 0] step=4910, skipped=5, lr=[2.1333333333333334e-07], mom=[[0.9, 0.999]] +[2022-12-17 13:38:39,343] [INFO] [timer.py:197:stop] 0/9820, RunningAvgSamplesPerSec=6.340597847266216, CurrSamplesPerSec=5.714028671694887, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:38:50,720] [INFO] [timer.py:197:stop] 0/9822, RunningAvgSamplesPerSec=6.340593712928795, CurrSamplesPerSec=5.680739170553177, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:39:02,079] [INFO] [timer.py:197:stop] 0/9824, RunningAvgSamplesPerSec=6.340594575940506, CurrSamplesPerSec=5.711247590859075, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:39:13,423] [INFO] [timer.py:197:stop] 0/9826, RunningAvgSamplesPerSec=6.340595406220354, CurrSamplesPerSec=5.713545836677856, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:39:24,773] [INFO] [timer.py:197:stop] 0/9828, RunningAvgSamplesPerSec=6.340595171840303, CurrSamplesPerSec=5.713653099270805, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:39:36,105] [INFO] [timer.py:197:stop] 0/9830, RunningAvgSamplesPerSec=6.340596883021344, CurrSamplesPerSec=5.7223742823568395, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:39:47,522] [INFO] [timer.py:197:stop] 0/9832, RunningAvgSamplesPerSec=6.340594647328931, CurrSamplesPerSec=5.67701365247189, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:39:58,843] [INFO] [timer.py:197:stop] 0/9834, RunningAvgSamplesPerSec=6.340595410168788, CurrSamplesPerSec=5.699328294212952, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:40:10,152] [INFO] [timer.py:197:stop] 0/9836, RunningAvgSamplesPerSec=6.3405977092798675, CurrSamplesPerSec=5.733189045775647, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:40:21,517] [INFO] [timer.py:197:stop] 0/9838, RunningAvgSamplesPerSec=6.34059512603362, CurrSamplesPerSec=5.709437866507493, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:40:32,842] [INFO] [logging.py:68:log_dist] [Rank 0] step=4920, skipped=5, lr=[1.911111111111111e-07], mom=[[0.9, 0.999]] +[2022-12-17 13:40:32,844] [INFO] [timer.py:197:stop] 0/9840, RunningAvgSamplesPerSec=6.3405952053865615, CurrSamplesPerSec=5.710939936974358, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:40:44,186] [INFO] [timer.py:197:stop] 0/9842, RunningAvgSamplesPerSec=6.340596580447692, CurrSamplesPerSec=5.737068135868828, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:40:55,521] [INFO] [timer.py:197:stop] 0/9844, RunningAvgSamplesPerSec=6.340598206431438, CurrSamplesPerSec=5.705499021755187, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:41:06,857] [INFO] [timer.py:197:stop] 0/9846, RunningAvgSamplesPerSec=6.340600054430399, CurrSamplesPerSec=5.7160389692080775, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:41:18,208] [INFO] [timer.py:197:stop] 0/9848, RunningAvgSamplesPerSec=6.340598016578841, CurrSamplesPerSec=5.6954347996193135, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:41:29,554] [INFO] [timer.py:197:stop] 0/9850, RunningAvgSamplesPerSec=6.340597950058121, CurrSamplesPerSec=5.6924349346684275, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 1.8e-07, 'epoch': 20.87} +[2022-12-17 13:41:40,896] [INFO] [timer.py:197:stop] 0/9852, RunningAvgSamplesPerSec=6.340596690920647, CurrSamplesPerSec=5.671698993757138, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:41:52,145] [INFO] [timer.py:197:stop] 0/9854, RunningAvgSamplesPerSec=6.3406011234994395, CurrSamplesPerSec=5.714989233011362, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:42:03,433] [INFO] [timer.py:197:stop] 0/9856, RunningAvgSamplesPerSec=6.340602982802754, CurrSamplesPerSec=5.727709096073513, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:42:14,717] [INFO] [timer.py:197:stop] 0/9858, RunningAvgSamplesPerSec=6.340604893504226, CurrSamplesPerSec=5.727854779225058, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:42:26,001] [INFO] [logging.py:68:log_dist] [Rank 0] step=4930, skipped=5, lr=[1.6888888888888888e-07], mom=[[0.9, 0.999]] +[2022-12-17 13:42:26,003] [INFO] [timer.py:197:stop] 0/9860, RunningAvgSamplesPerSec=6.3406086560517485, CurrSamplesPerSec=5.7084192017256035, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:42:37,376] [INFO] [timer.py:197:stop] 0/9862, RunningAvgSamplesPerSec=6.340601733325555, CurrSamplesPerSec=5.652896269703396, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:42:48,699] [INFO] [timer.py:197:stop] 0/9864, RunningAvgSamplesPerSec=6.340598766994051, CurrSamplesPerSec=5.678477086373248, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:43:00,014] [INFO] [timer.py:197:stop] 0/9866, RunningAvgSamplesPerSec=6.340596977908883, CurrSamplesPerSec=5.697784915718732, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:43:11,313] [INFO] [timer.py:197:stop] 0/9868, RunningAvgSamplesPerSec=6.34059902107909, CurrSamplesPerSec=5.710696218920705, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:43:22,574] [INFO] [timer.py:197:stop] 0/9870, RunningAvgSamplesPerSec=6.340604113495977, CurrSamplesPerSec=5.732323956868721, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:43:33,938] [INFO] [timer.py:197:stop] 0/9872, RunningAvgSamplesPerSec=6.340597750596586, CurrSamplesPerSec=5.650902772471108, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:43:45,269] [INFO] [timer.py:197:stop] 0/9874, RunningAvgSamplesPerSec=6.340595945296254, CurrSamplesPerSec=5.690554355105167, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:43:56,585] [INFO] [timer.py:197:stop] 0/9876, RunningAvgSamplesPerSec=6.3405946051103, CurrSamplesPerSec=5.69468689277867, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:44:07,900] [INFO] [timer.py:197:stop] 0/9878, RunningAvgSamplesPerSec=6.340593489179957, CurrSamplesPerSec=5.694379087707014, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:44:19,150] [INFO] [logging.py:68:log_dist] [Rank 0] step=4940, skipped=5, lr=[1.4666666666666668e-07], mom=[[0.9, 0.999]] +[2022-12-17 13:44:19,151] [INFO] [timer.py:197:stop] 0/9880, RunningAvgSamplesPerSec=6.340597604830494, CurrSamplesPerSec=5.716830481351049, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:44:30,454] [INFO] [timer.py:197:stop] 0/9882, RunningAvgSamplesPerSec=6.340596721036361, CurrSamplesPerSec=5.6957004202676496, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:44:41,769] [INFO] [timer.py:197:stop] 0/9884, RunningAvgSamplesPerSec=6.340594316788758, CurrSamplesPerSec=5.682323846460689, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:44:53,052] [INFO] [timer.py:197:stop] 0/9886, RunningAvgSamplesPerSec=6.340596276162941, CurrSamplesPerSec=5.7212739295736, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:45:04,379] [INFO] [timer.py:197:stop] 0/9888, RunningAvgSamplesPerSec=6.34059408455692, CurrSamplesPerSec=5.689828473058532, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:45:15,650] [INFO] [timer.py:197:stop] 0/9890, RunningAvgSamplesPerSec=6.340597297284146, CurrSamplesPerSec=5.721333680717065, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:45:26,950] [INFO] [timer.py:197:stop] 0/9892, RunningAvgSamplesPerSec=6.340595498450684, CurrSamplesPerSec=5.704597655422528, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:45:38,258] [INFO] [timer.py:197:stop] 0/9894, RunningAvgSamplesPerSec=6.340593335221015, CurrSamplesPerSec=5.693914543849568, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:45:49,551] [INFO] [timer.py:197:stop] 0/9896, RunningAvgSamplesPerSec=6.340594562498777, CurrSamplesPerSec=5.721465869313914, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:46:00,863] [INFO] [timer.py:197:stop] 0/9898, RunningAvgSamplesPerSec=6.340592944529698, CurrSamplesPerSec=5.69145056464072, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:46:12,155] [INFO] [logging.py:68:log_dist] [Rank 0] step=4950, skipped=5, lr=[1.2444444444444446e-07], mom=[[0.9, 0.999]] +[2022-12-17 13:46:12,157] [INFO] [timer.py:197:stop] 0/9900, RunningAvgSamplesPerSec=6.340591871089729, CurrSamplesPerSec=5.701376454174018, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 1.2444444444444446e-07, 'epoch': 20.97} +[2022-12-17 13:46:23,459] [INFO] [timer.py:197:stop] 0/9902, RunningAvgSamplesPerSec=6.340590961739624, CurrSamplesPerSec=5.694413394002929, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:46:34,742] [INFO] [timer.py:197:stop] 0/9904, RunningAvgSamplesPerSec=6.340592714523383, CurrSamplesPerSec=5.719328187867814, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:46:46,073] [INFO] [timer.py:197:stop] 0/9906, RunningAvgSamplesPerSec=6.340586601472477, CurrSamplesPerSec=5.6506555873434285, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:46:57,649] [INFO] [timer.py:197:stop] 0/9908, RunningAvgSamplesPerSec=6.340583139550562, CurrSamplesPerSec=5.688344477258061, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:47:08,964] [INFO] [timer.py:197:stop] 0/9910, RunningAvgSamplesPerSec=6.340582063583655, CurrSamplesPerSec=5.69384062966408, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:47:17,445] [INFO] [timer.py:197:stop] 0/9912, RunningAvgSamplesPerSec=6.340896979420159, CurrSamplesPerSec=10.21189842496566, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:47:28,730] [INFO] [timer.py:197:stop] 0/9914, RunningAvgSamplesPerSec=6.3409001843020745, CurrSamplesPerSec=5.709035213714303, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:47:40,043] [INFO] [timer.py:197:stop] 0/9916, RunningAvgSamplesPerSec=6.340900307056376, CurrSamplesPerSec=5.717296093178765, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:47:51,324] [INFO] [timer.py:197:stop] 0/9918, RunningAvgSamplesPerSec=6.340903282977045, CurrSamplesPerSec=5.73046219594243, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:48:02,669] [INFO] [logging.py:68:log_dist] [Rank 0] step=4960, skipped=5, lr=[1.0222222222222224e-07], mom=[[0.9, 0.999]] +[2022-12-17 13:48:02,671] [INFO] [timer.py:197:stop] 0/9920, RunningAvgSamplesPerSec=6.340898328388526, CurrSamplesPerSec=5.660043561397479, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:48:13,952] [INFO] [timer.py:197:stop] 0/9922, RunningAvgSamplesPerSec=6.340898483784882, CurrSamplesPerSec=5.702508899964778, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:48:25,254] [INFO] [timer.py:197:stop] 0/9924, RunningAvgSamplesPerSec=6.340898390789046, CurrSamplesPerSec=5.700757735356679, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:48:36,548] [INFO] [timer.py:197:stop] 0/9926, RunningAvgSamplesPerSec=6.340896976319998, CurrSamplesPerSec=5.693027458782009, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:48:47,837] [INFO] [timer.py:197:stop] 0/9928, RunningAvgSamplesPerSec=6.340898333657151, CurrSamplesPerSec=5.70214695287267, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:48:59,116] [INFO] [timer.py:197:stop] 0/9930, RunningAvgSamplesPerSec=6.340901462526044, CurrSamplesPerSec=5.718285282932952, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:49:10,404] [INFO] [timer.py:197:stop] 0/9932, RunningAvgSamplesPerSec=6.3409020831662275, CurrSamplesPerSec=5.71534161709944, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:49:21,711] [INFO] [timer.py:197:stop] 0/9934, RunningAvgSamplesPerSec=6.340902295046757, CurrSamplesPerSec=5.712443041122582, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:49:32,997] [INFO] [timer.py:197:stop] 0/9936, RunningAvgSamplesPerSec=6.340904588484402, CurrSamplesPerSec=5.711998395744774, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:49:44,255] [INFO] [timer.py:197:stop] 0/9938, RunningAvgSamplesPerSec=6.340910112012096, CurrSamplesPerSec=5.73411833383575, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:49:55,502] [INFO] [logging.py:68:log_dist] [Rank 0] step=4970, skipped=5, lr=[8e-08], mom=[[0.9, 0.999]] +[2022-12-17 13:49:55,503] [INFO] [timer.py:197:stop] 0/9940, RunningAvgSamplesPerSec=6.3409167332964005, CurrSamplesPerSec=5.74774976626621, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:50:06,797] [INFO] [timer.py:197:stop] 0/9942, RunningAvgSamplesPerSec=6.340917796579706, CurrSamplesPerSec=5.698404682909532, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:50:18,103] [INFO] [timer.py:197:stop] 0/9944, RunningAvgSamplesPerSec=6.34091793494103, CurrSamplesPerSec=5.704016295522513, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:50:29,353] [INFO] [timer.py:197:stop] 0/9946, RunningAvgSamplesPerSec=6.340923511825759, CurrSamplesPerSec=5.728178681911909, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:50:40,607] [INFO] [timer.py:197:stop] 0/9948, RunningAvgSamplesPerSec=6.340929224906553, CurrSamplesPerSec=5.731069760153105, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:50:51,889] [INFO] [timer.py:197:stop] 0/9950, RunningAvgSamplesPerSec=6.340932002980195, CurrSamplesPerSec=5.713992669089812, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 6.888888888888889e-08, 'epoch': 21.08} +[2022-12-17 13:51:03,181] [INFO] [timer.py:197:stop] 0/9952, RunningAvgSamplesPerSec=6.340932461337423, CurrSamplesPerSec=5.707336586028797, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:51:14,507] [INFO] [timer.py:197:stop] 0/9954, RunningAvgSamplesPerSec=6.340929661883157, CurrSamplesPerSec=5.705845627693179, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:51:25,809] [INFO] [timer.py:197:stop] 0/9956, RunningAvgSamplesPerSec=6.340930317921198, CurrSamplesPerSec=5.708943422655863, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:51:37,140] [INFO] [timer.py:197:stop] 0/9958, RunningAvgSamplesPerSec=6.340929734921853, CurrSamplesPerSec=5.696032299621633, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:51:48,475] [INFO] [logging.py:68:log_dist] [Rank 0] step=4980, skipped=5, lr=[5.777777777777778e-08], mom=[[0.9, 0.999]] +[2022-12-17 13:51:48,477] [INFO] [timer.py:197:stop] 0/9960, RunningAvgSamplesPerSec=6.340925350607312, CurrSamplesPerSec=5.688403060397394, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:52:00,005] [INFO] [timer.py:197:stop] 0/9962, RunningAvgSamplesPerSec=6.3409241755697, CurrSamplesPerSec=5.708690162635773, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:52:11,338] [INFO] [timer.py:197:stop] 0/9964, RunningAvgSamplesPerSec=6.340920354302299, CurrSamplesPerSec=5.6829395326607095, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:52:22,674] [INFO] [timer.py:197:stop] 0/9966, RunningAvgSamplesPerSec=6.340916614696622, CurrSamplesPerSec=5.698118973321, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:52:34,000] [INFO] [timer.py:197:stop] 0/9968, RunningAvgSamplesPerSec=6.340914119441085, CurrSamplesPerSec=5.70057202483779, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:52:45,330] [INFO] [timer.py:197:stop] 0/9970, RunningAvgSamplesPerSec=6.340909146581982, CurrSamplesPerSec=5.697504105417921, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:52:56,611] [INFO] [timer.py:197:stop] 0/9972, RunningAvgSamplesPerSec=6.340909995964699, CurrSamplesPerSec=5.709983651268353, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:53:07,892] [INFO] [timer.py:197:stop] 0/9974, RunningAvgSamplesPerSec=6.34090860048114, CurrSamplesPerSec=5.708770776043156, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:53:19,190] [INFO] [timer.py:197:stop] 0/9976, RunningAvgSamplesPerSec=6.340909069212601, CurrSamplesPerSec=5.708913797600395, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:53:30,496] [INFO] [timer.py:197:stop] 0/9978, RunningAvgSamplesPerSec=6.340909470341716, CurrSamplesPerSec=5.7091665917667935, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:53:41,802] [INFO] [logging.py:68:log_dist] [Rank 0] step=4990, skipped=5, lr=[3.555555555555556e-08], mom=[[0.9, 0.999]] +[2022-12-17 13:53:41,804] [INFO] [timer.py:197:stop] 0/9980, RunningAvgSamplesPerSec=6.340909563247144, CurrSamplesPerSec=5.706446284646033, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:53:53,132] [INFO] [timer.py:197:stop] 0/9982, RunningAvgSamplesPerSec=6.340906959207517, CurrSamplesPerSec=5.699439864073999, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:54:04,416] [INFO] [timer.py:197:stop] 0/9984, RunningAvgSamplesPerSec=6.3409092895430135, CurrSamplesPerSec=5.729085559804991, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:54:15,670] [INFO] [timer.py:197:stop] 0/9986, RunningAvgSamplesPerSec=6.34091422515384, CurrSamplesPerSec=5.727353230141937, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:54:26,952] [INFO] [timer.py:197:stop] 0/9988, RunningAvgSamplesPerSec=6.340917488526028, CurrSamplesPerSec=5.715415360428148, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:54:38,232] [INFO] [timer.py:197:stop] 0/9990, RunningAvgSamplesPerSec=6.340920287446701, CurrSamplesPerSec=5.71633670397908, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:54:49,484] [INFO] [timer.py:197:stop] 0/9992, RunningAvgSamplesPerSec=6.340926020804758, CurrSamplesPerSec=5.750438155929827, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:55:00,765] [INFO] [timer.py:197:stop] 0/9994, RunningAvgSamplesPerSec=6.340926852172472, CurrSamplesPerSec=5.695170895359499, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:55:12,071] [INFO] [timer.py:197:stop] 0/9996, RunningAvgSamplesPerSec=6.340927324768952, CurrSamplesPerSec=5.710692574242281, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:55:23,326] [INFO] [timer.py:197:stop] 0/9998, RunningAvgSamplesPerSec=6.340931445461435, CurrSamplesPerSec=5.722994042135287, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 13:55:34,598] [INFO] [logging.py:68:log_dist] [Rank 0] step=5000, skipped=5, lr=[1.3333333333333334e-08], mom=[[0.9, 0.999]] +[2022-12-17 13:55:34,599] [INFO] [timer.py:197:stop] 0/10000, RunningAvgSamplesPerSec=6.340933607515229, CurrSamplesPerSec=5.7027141206342025, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 1.3333333333333334e-08, 'epoch': 21.19} +{'eval_loss': 0.2120361328125, 'eval_wer': 9.045873924973758, 'eval_runtime': 2098.0347, 'eval_samples_per_second': 3.677, 'eval_steps_per_second': 0.46, 'epoch': 21.19} +[2022-12-17 14:30:36,207] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step5000 is begin to save! +[2022-12-17 14:30:36,217] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: ./checkpoint-5000/global_step5000/mp_rank_00_model_states.pt +[2022-12-17 14:30:36,217] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving ./checkpoint-5000/global_step5000/mp_rank_00_model_states.pt... +[2022-12-17 14:30:39,890] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-5000/global_step5000/mp_rank_00_model_states.pt. +[2022-12-17 14:30:39,891] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving ./checkpoint-5000/global_step5000/zero_pp_rank_0_mp_rank_00_optim_states.pt... +[2022-12-17 14:30:54,827] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-5000/global_step5000/zero_pp_rank_0_mp_rank_00_optim_states.pt. +[2022-12-17 14:30:54,828] [INFO] [engine.py:3269:_save_zero_checkpoint] zero checkpoint saved ./checkpoint-5000/global_step5000/zero_pp_rank_0_mp_rank_00_optim_states.pt +[2022-12-17 14:30:54,828] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now!