{ | |
"batch_size": 10, | |
"block_size": 512, | |
"d_model": 512, | |
"n_heads": 8, | |
"n_layers": 8, | |
"dropout": 0.18, | |
"norm_eps": 1e-5, | |
"learning_rate": 3e-5 | |
} |
{ | |
"batch_size": 10, | |
"block_size": 512, | |
"d_model": 512, | |
"n_heads": 8, | |
"n_layers": 8, | |
"dropout": 0.18, | |
"norm_eps": 1e-5, | |
"learning_rate": 3e-5 | |
} |