Update README.md
Browse files
README.md
CHANGED
@@ -68,7 +68,7 @@ The model is trained of 8 A100 80GB for approximately 15hrs.
|
|
68 |
| per_device_train_batch_size | 2 |
|
69 |
| gradient_accumulation_steps | 1 |
|
70 |
| epoch | 3 |
|
71 |
-
| steps |
|
72 |
| learning_rate | 2e-5 |
|
73 |
| lr schedular type | cosine |
|
74 |
| warmup ratio | 0.1 |
|
|
|
68 |
| per_device_train_batch_size | 2 |
|
69 |
| gradient_accumulation_steps | 1 |
|
70 |
| epoch | 3 |
|
71 |
+
| steps | 34503 |
|
72 |
| learning_rate | 2e-5 |
|
73 |
| lr schedular type | cosine |
|
74 |
| warmup ratio | 0.1 |
|